Esempio n. 1
0
class FriendExist():
    def __init__(self, username, password):
        self.launcher = Launcher(username, password)
        self.driver = self.launcher.login_mobile()
        time.sleep(2)
        self.driver.get('https://m.facebook.com/friends/center/friends')
        time.sleep(3)

        #加载更多
        try:
            self.driver.find_element_by_xpath(
                '//div[@id="friends_center_main"]/div[2]/a').click()
        except:
            pass

        self.es = Es_fb()
        self.friends_list = []
        self.current_ts = int(time.time())
        self.update_time = self.current_ts

    def get_friend_exist(self):
        for each in self.driver.find_elements_by_xpath(
                '//div[@id="friends_center_main"]/div[2]/div'):
            item = {}
            #try:
            #pic_url = each.find_element_by_xpath('./table/tbody/tr/td[1]/img').get_attribute('src')
            name = each.find_element_by_xpath('./table/tbody/tr/td[2]/a').text
            user_id = ''.join(
                re.findall(
                    re.compile('uid=(\d+)'),
                    each.find_element_by_xpath(
                        './table/tbody/tr/td[2]/a').get_attribute('href')))
            profile_url = 'https://m.facebook.com/profile.php?id=' + str(
                user_id)
            #except:
            #pass
            item['uid'] = user_id
            #item['photo_url'] = pic_url
            item['nick_name'] = name
            item['profile_url'] = profile_url
            self.friends_list.append(item)
        #for i in self.friends_list:
        #   self.driver.get(i['profile_url'])
        #   try:
        #      self.driver.find_element_by_xpath('//div[@id="m-timeline-cover-section"]/div[4]/a[2]').click()
        #   except:
        #      self.driver.find_element_by_xpath('//div[@id="root"]/div[1]/div[1]/div[4]/a[2]').click()
        #   time.sleep(2)
        #   try:
        #      friends = int(re.search(r'(\d+)', self.driver.find_element_by_xpath('//div[@id="root"]/div[1]/h3').text.replace(',', '').replace(' ', '')).group(1))
        #   except:
        #      friends = 'None'
        #   i['friends'] = friends
        #   i['update_time'] = self.update_time

        self.driver.quit()
        return self.friends_list

    def save(self, indexName, typeName, friends_exist_list):
        self.es.executeES(indexName, typeName, friends_exist_list)
Esempio n. 2
0
class Friend():
	def __init__(self, username, password):
		self.launcher = Launcher(username, password)
		self.driver,self.display = self.launcher.login()
		time.sleep(2)
		# 退出通知弹窗进入页面
		try:
			self.driver.find_element_by_xpath('//div[@class="_n8 _3qx uiLayer _3qw"]').click()
		except:
			pass
		self.driver.find_element_by_xpath('//a[@title="个人主页"]').click()
		time.sleep(3)
		# 退出通知弹窗进入页面
		try:
			self.driver.find_element_by_xpath('//div[@class="_n8 _3qx uiLayer _3qw"]').click()
		except:
			pass
		#self.driver.find_element_by_xpath('//ul[@data-referrer="timeline_light_nav_top"]/li[3]/a').click()
		self.driver.find_element_by_xpath('//ul[@data-referrer="timeline_light_nav_top"]/li[3]/a').click()
		time.sleep(3)
		# 退出通知弹窗进入页面
		try:
			self.driver.find_element_by_xpath('//div[@class="_n8 _3qx uiLayer _3qw"]').click()
		except:
			pass

		#加载更多
		length=100
		for i in range(0,50):
			js="var q=document.documentElement.scrollTop="+str(length) 
			self.driver.execute_script(js) 
			time.sleep(1)
			length+=length

		self.es = Es_fb()
		self.list = []
		self.current_ts = int(time.time())
		self.update_time = self.current_ts

	def get_friend(self):
		try:
			for each in self.driver.find_elements_by_xpath('//div[@class="_5h60 _30f"]//ul//li'):
				try:
					pic_url = each.find_element_by_xpath('./div/a/img').get_attribute('src')
					name = each.find_element_by_xpath('./div/div/div[2]/div/div[2]/div/a').text
					user_id = ''.join(re.findall(re.compile('id=(\d+)'),each.find_element_by_xpath('./div/div/div[2]/div/div[2]/div/a').get_attribute('data-hovercard')))
					friends = each.find_element_by_xpath('./div/div/div[2]/div/div[2]/a').text
					profile_url = each.find_element_by_xpath('./div/div/div[2]/div/div[2]/div/a').get_attribute('href') + '&sk=about'
				except:
					pass
				self.list.append({'uid':user_id, 'photo_url':pic_url, 'nick_name':name, 'friends':friends, 'profile_url':profile_url, 'update_time':self.update_time})
		finally:
			self.driver.quit()
			self.display.popen.kill()
		return self.list

	def save(self, indexName, typeName, list):
		self.es.executeES(indexName, typeName, list)
Esempio n. 3
0
class Share():
    def __init__(self, username, password):
        self.launcher = Launcher(username, password)
        self.driver = self.launcher.login()
        self.es = Es_fb()
        self.list = []
        self.share_list = self.launcher.get_share_list()

    def get_share(self):
        self.driver.get(self.share_list[0])
        for ea in self.driver.find_elements_by_xpath(
                '//div[@id="repost_view_permalink"]/div/div[1]/div'):
            for each in ea.find_elements_by_xpath('./div'):
                author_name = each.find_element_by_xpath(
                    './div/div[2]/div[1]/div[2]/div[1]/div/div/div[2]/div/div/div[2]/h5/span/span/span/a'
                ).text
                author_id = re.findall(
                    re.compile('id=(\d+)'),
                    each.find_element_by_xpath(
                        './div/div[2]/div[1]/div[2]/div[1]/div/div/div[2]/div/div/div[2]/h5/span/span/span/a'
                    ).get_attribute('data-hovercard'))
                pic_url = each.find_element_by_xpath(
                    './div/div[2]/div/div[2]/div/div/a/div/img').get_attribute(
                        'src')
                try:
                    content = each.find_element_by_xpath(
                        './div/div[2]/div/div[2]/div[2]//p').text
                except Exception as e:
                    content = 'None'
                timestamp = int(
                    each.find_element_by_xpath(
                        './div/div[2]/div/div[2]/div/div/div/div[2]/div/div/div[2]/div/span[3]/span/a/abbr'
                    ).get_attribute('data-utime'))
                item = {
                    'nick_name': author_name,
                    'uid': author_id,
                    'photo_url': pic_url,
                    'text': content,
                    'timestamp': timestamp
                }
                self.list.append(item)
        return self.list

    def save(self, indexName, typeName, list):
        self.es.executeES(indexName, typeName, list)
Esempio n. 4
0
class Message():
	def __init__(self,username, password):
		self.launcher = Launcher(username, password)
		self.driver = self.launcher.login()
		self.es = Es_fb()
		self.list = []

	def get_list(self):
		self.driver.get('https://www.facebook.com/messages/t/')
		sx_list = []
		for each in self.driver.find_elements_by_xpath('//ul[@aria-label="对话列表"]/li'):
			author_name = each.find_element_by_xpath('./div/a/div[2]/div[1]/span').text
			pic_url = each.find_element_by_xpath('./div/a/div[1]/div/div/div//img').get_attribute('src')
			message_url = each.find_element_by_xpath('./div/a').get_attribute('data-href')
			sx_list.append({'name':author_name,'pic':pic_url,'message_url':message_url})
		return sx_list

	def get_message(self):
		sx_list = self.get_list()
		for sx in sx_list:
			self.driver.get(sx['message_url'])
			time.sleep(1)
			for message in self.driver.find_elements_by_xpath('//div[@class="_41ud"]'):
				try:
					mes = message.find_element_by_xpath('./div/div/div/span').text
				except Exception as e:
					mes = 'None'
			try:
				ti = [each for each in self.driver.find_elements_by_xpath('//div[@aria-label="消息"]//time')][-1].text
				ti = '-'.join([i for i in re.findall(re.compile('(\d+)年(\d+)月(\d+)日'),ti)[0]])
				timestamp = int(time.mktime(time.strptime(ti,"%Y-%m-%d")))
			except:
				timestamp = int(time.time())
			self.list.append({'nick_name':sx['name'],'text':mes,'timestamp':timestamp})
		return self.list

	def save(self, indexName, typeName, list):
		self.es.executeES(indexName, typeName, list)
Esempio n. 5
0
class Message():
	def __init__(self,username, password):
		self.launcher = Launcher(username, password)
		self.driver = self.launcher.login()
		self.es = Es_fb()
		self.list = []
		self.update_time = int(time.time())

	def get_list(self):
		self.driver.get('https://www.facebook.com/messages/t/')
		# 退出通知弹窗进入页面
		try:
			self.driver.find_element_by_xpath('//div[@class="_n8 _3qx uiLayer _3qw"]').click()
		except:
			pass

		sx_list = []
		for each in self.driver.find_elements_by_xpath('//ul[@aria-label="对话列表"]/li'):
			try:
				author_name = each.find_element_by_xpath('./div/a/div[2]/div[1]/span').text
			except:
				author_name = 'None'
			try:
				author_id = ''.join(re.findall(re.compile('row_header_id_user:(\d+)'),each.find_element_by_xpath('./div').get_attribute('id')))
			except:
				author_id = 'None'
			try:
				pic_url = each.find_element_by_xpath('./div/a/div[1]/div/div/div//img').get_attribute('src')
			except:
				pic_url = 'None'
			try:
				message_url = each.find_element_by_xpath('./div/a').get_attribute('data-href')
			except:
				message_url = False
			if message_url:
				sx_list.append({'name':author_name, 'pic':pic_url, 'message_url':message_url, 'author_id':author_id})
		return sx_list

	def get_message(self):
		try:
			sx_list = self.get_list()
			for sx in sx_list:
				self.driver.get(sx['message_url'])
				time.sleep(1)
				# 退出通知弹窗进入页面
				try:
					self.driver.find_element_by_xpath('//div[@class="_n8 _3qx uiLayer _3qw"]').click()
				except:
					pass

				for message in self.driver.find_elements_by_xpath('//div[@class="_41ud"]'):
					try:
						ymd = '-'.join([t for t in re.findall(re.compile('(\d+)年(\d+)月(\d+)日'),message.find_element_by_xpath('./div/div').get_attribute('data-tooltip-content'))[0]])
						hm = ':'.join([q for q in re.findall(re.compile('(\d+):(\d+)'),message.find_element_by_xpath('./div/div').get_attribute('data-tooltip-content'))[0]])
						messagetime = ymd + ' ' + hm + ':00'
						messageTime = int(time.mktime(time.strptime(messagetime,'%Y-%m-%d %H:%M:%S')))
					except:
						messageTime = 0

					try:
						messageId = re.findall(re.compile('"fbid:(\d+)"'),message.find_element_by_xpath('./div/div').get_attribute('participants'))[-1]
						if messageId == sx['author_id']:
							private_type = 'receive'
							text = message.text
							root_text = 'None'
						else:
							private_type = 'make'
							text = 'None'
							root_text = message.text
					except:
							private_type = 'unknown'
							text = 'None'
							root_text = 'None'
				self.list.append({'uid':sx['author_id'], 'photo_url':sx['pic'], 'nick_name':sx['name'], 'timestamp':messageTime, 'update_time':self.update_time, 'text':text, 'root_text':root_text, 'private_type':private_type})
		finally:
			self.driver.close()
		return self.list

	def save(self, indexName, typeName, list):
		self.es.executeES(indexName, typeName, list)
Esempio n. 6
0
class Comment():
    def __init__(self, username, password):
        self.launcher = Launcher(username, password)
        self.driver = self.launcher.login()
        self.es = Es_fb()
        self.comment_list = self.launcher.get_comment_list()
        self.list = []
        self.update_time = int(time.time())

    def get_comment(self):
        try:
            for url in self.comment_list:
                print(url)
                self.driver.get(url)
                time.sleep(1)
                # 退出通知弹窗进入页面
                try:
                    self.driver.find_element_by_xpath(
                        '//div[@class="_n8 _3qx uiLayer _3qw"]').click()
                except:
                    pass

                try:
                    try:
                        root_text = self.driver.find_element_by_xpath(
                            '//div[@role="feed"]/div[1]/div[1]/div[2]/div[1]/div[2]/div[2]'
                        ).text
                    except:
                        root_text = self.driver.find_element_by_xpath(
                            '//div[@role="feed"]/div[1]/div[1]/div[1]/div[1]/div[2]/div[2]'
                        ).text
                except:
                    root_text = 'None'
                try:
                    try:
                        root_mid = ''.join(
                            re.findall(
                                re.compile('story_fbid=(\d+)'),
                                self.driver.find_element_by_xpath(
                                    '//div[@role="feed"]/div[1]/div[1]/div[2]/div[1]/div[2]/div[1]/div/div/div[2]/div/div/div[2]/div/span[3]/span/a'
                                ).get_attribute('href')))
                    except:
                        root_mid = ''.join(
                            re.findall(
                                re.compile('story_fbid=(\d+)'),
                                self.driver.find_element_by_xpath(
                                    '//div[@role="feed"]/div[1]/div[1]/div[1]/div[1]/div[2]/div[1]/div/div/div[2]/div/div/div[2]/div/span[3]/span/a'
                                ).get_attribute('href')))
                except:
                    root_mid = 'None'
                for each in self.driver.find_elements_by_xpath(
                        '//div[@aria-label="评论"]'):
                    try:
                        try:
                            author_name = each.find_element_by_xpath(
                                './div/div/div/div[2]/div/div/div/div/div/span/span[1]/a'
                            ).text
                        except:
                            author_name = each.find_element_by_xpath(
                                './div/div/div/div[2]/div/div/div/span/span[1]/a'
                            ).text
                    except:
                        author_name = 'None'
                    try:
                        try:
                            author_id = ''.join(
                                re.findall(
                                    re.compile('id=(\d+)'),
                                    each.find_element_by_xpath(
                                        './div/div/div/div[2]/div/div/div/div/div/span/span[1]/a'
                                    ).get_attribute('data-hovercard')))
                        except:
                            author_id = ''.join(
                                re.findall(
                                    re.compile('id=(\d+)'),
                                    each.find_element_by_xpath(
                                        './div/div/div/div[2]/div/div/div/span/span[1]/a'
                                    ).get_attribute('data-hovercard')))
                    except:
                        author_id = 'None'
                    try:
                        pic_url = each.find_element_by_xpath(
                            './div/div/div/div[1]/a/img').get_attribute('src')
                    except:
                        pic_url = 'None'
                    try:
                        content = each.find_element_by_xpath(
                            './div/div/div/div[2]/div/div/div/div/div/span/span[2]/span/span/span/span'
                        ).text
                    except:
                        content = each.find_element_by_xpath(
                            './div/div/div/div[2]/div/div/div/span/span[2]/span/span/span/span'
                        ).text
                    try:
                        ti = int(
                            each.find_element_by_xpath(
                                './div/div/div/div[2]/div/div/div[2]/span[4]/a/abbr'
                            ).get_attribute('data-utime'))
                    except:
                        ti = int(
                            each.find_element_by_xpath(
                                './div/div/div/div[2]/div/div/div[2]/span[5]/a/abbr'
                            ).get_attribute('data-utime'))
                    self.list.append({'uid':author_id, 'photo_url':pic_url, 'nick_name':author_name, 'mid':root_mid, 'timestamp':ti, 'text':content,\
                          'update_time':self.update_time, 'root_text':root_text, 'root_mid':root_mid})
        finally:
            self.driver.close()
        return self.list

    def save(self, indexName, typeName, list):
        self.es.executeES(indexName, typeName, list)
Esempio n. 7
0
class Like():
    def __init__(self, username, password):
        self.launcher = Launcher(username, password)
        self.like_urls_list, self.driver = self.launcher.get_like_list()
        self.es = Es_fb()
        self.like_list = []
        self.update_time = int(time.time())

    def date2timestamp(self, date):
        date = date.replace(u'月', '-').replace(u'日', '').replace(' ', '')
        if u'上午' in date:
            date = date.split(u'上午')[0]
        if u'下午' in date:
            date = date.split(u'下午')[0]
        if u'分钟' in date:
            timestamp = int(
                time.time()) - int(re.search(r'(\d+)', date).group(1)) * 60
            return timestamp
        if u'小时' in date:
            timestamp = int(time.time()) - int(
                re.search(r'(\d+)', date).group(1)) * 60 * 60
            return timestamp
        if u'年' not in date and u'分钟' not in date and u'小时' not in date:
            date = str(
                time.strftime('%Y-%m-%d', time.localtime(
                    time.time())).split('-')[0]) + '-' + date
        if u'年' in date and u'分钟' not in date and u'小时' not in date:
            date = date.replace(u'年', '-')
        timestamp = int(time.mktime(time.strptime(date, '%Y-%m-%d')))
        return timestamp

    def get_like(self):
        for url in self.like_urls_list:
            self.driver.get(url)
            time.sleep(1)

            try:
                root_text = self.driver.find_element_by_xpath(
                    '//div[@id="m_story_permalink_view"]/div[1]/div/div[1]/div[2]'
                ).text
            except:
                root_text = 'None'
            print root_text

            try:
                timestamp = self.date2timestamp(
                    self.driver.find_element_by_xpath(
                        '//div[@id="m_story_permalink_view"]/div[1]/div/div[2]/div[1]'
                    ).text)
            except:
                timestamp = 0
            print timestamp

            try:
                root_mid = ''.join(
                    re.search(re.compile('fbid%3D(\d+)%'), url).group(1))
            except:
                root_mid = 0
            print root_mid

            # 进入点赞列表页
            self.driver.get(
                self.driver.find_element_by_xpath(
                    '//div[@id="m_story_permalink_view"]/div[2]/div/div[3]/a').
                get_attribute('href'))
            time.sleep(5)

            for each in self.driver.find_elements_by_xpath(
                    '//div[@id="root"]/table/tbody/tr/td/div/ul/li'):
                try:
                    author_name = each.find_element_by_xpath(
                        './table/tbody/tr/td/table/tbody/tr/td[3]/div/h3[1]/a'
                    ).text
                except:
                    author_name = 'None'
                print author_name

                try:
                    author_id = ''.join(
                        re.findall(
                            re.compile('id=(\d+)'),
                            each.find_element_by_xpath(
                                './table/tbody/tr/td/table/tbody/tr/td[3]/div/h3[1]/a'
                            ).get_attribute('href')))
                except:
                    author_id = 0
                try:
                    pic_url = each.find_element_by_xpath(
                        './table/tbody/tr/td/table/tbody/tr/td[1]/img'
                    ).get_attribute('src')
                except:
                    pic_url = 'None'

                item = {
                    'uid': author_id,
                    'photo_url': pic_url,
                    'nick_name': author_name,
                    'timestamp': timestamp,
                    'root_text': root_text,
                    'update_time': self.update_time,
                    'root_mid': root_mid
                }
                self.like_list.append(item)

        self.driver.quit()
        return self.like_list

    def save(self, indexName, typeName, list):
        self.es.executeES(indexName, typeName, list)
Esempio n. 8
0
class Message():
	def __init__(self,username, password):
		self.launcher = Launcher(username, password)
		self.driver = self.launcher.login_mobile()
		self.es = Es_fb()
		self.list = []
		self.update_time = int(time.time())

	def date2timestamp(self, date):
		date = date.replace(u'月', '-').replace(u'日', '').replace(' ', '')
		if date == '刚刚':
			timestamp = int(time.time())
			return timestamp
		if u'上午' in date:
			date = date.replace(u'上午', ' ')
		if u'下午' in date:
			if date.split(u'下午')[1].split(':')[0] == '12':
				date = date.replace(u'下午', ' ')
			elif eval(date.split(u'下午')[1].split(':')[0]) < 12:
				date = date.split(u'下午')[0] + ' ' + str(eval(date.split(u'下午')[1].split(':')[0])+12) + ':' + date.split(u'下午')[1].split(':')[1]
		if u'年' not in date and u'分钟' not in date and u'小时' not in date:
			date = str(time.strftime('%Y-%m-%d', time.localtime(time.time())).split('-')[0]) + '-' + date
		if u'年' in date and u'分钟' not in date and u'小时' not in date:
			date = date.replace(u'年', '-')
		if u'分钟' in date:
			timestamp = int(time.time()) - int(re.search(r'(\d+)', date).group(1)) * 60
			return timestamp 
		if u'小时' in date:
			timestamp = int(time.time()) - int(re.search(r'(\d+)', date).group(1)) * 60 * 60
			return timestamp

		try:
			timestamp = int(time.mktime(time.strptime(date, '%Y-%m-%d')))
		except:
			timestamp = int(time.mktime(time.strptime(date, '%Y-%m-%d %H:%M')))
		return timestamp

	def get_list(self):
		self.driver.get('https://m.facebook.com/messages/t/')

		sx_list = []
		for each in self.driver.find_elements_by_xpath('//div[@id="root"]/div[1]/div[2]/div/table'):
			try:
				author_name = each.find_element_by_xpath('./tbody/tr/td/div/h3[1]').text
			except:
				author_name = 'None'
			print author_name

			try:
				author_id = ''.join(re.findall(re.compile('%3A(\d+)#'),each.find_element_by_xpath('./tbody/tr/td/div/h3[1]/a').get_attribute('href')))
			except:
				author_id = 'None'
			print author_id

			try:
				message_url = each.find_element_by_xpath('./tbody/tr/td/div/h3[1]/a').get_attribute('href')
			except:
				message_url = False
			print message_url

			if message_url:
				sx_list.append({'author_name':author_name, 'message_url':message_url, 'author_id':author_id})
		return sx_list

	def get_message(self):
		sx_list = self.get_list()
		for sx in sx_list:
			self.driver.get('https://m.facebook.com/profile.php?id=' + str(sx['author_id']))
			try:
				photo_url = self.driver.find_element_by_xpath('//div[@id="m-timeline-cover-section"]/div[1]/div[2]/div[1]/div/a/img').get_attribute('src')
			except:
				try:
					photo_url = self.driver.find_element_by_xpath('//div[@id="m-timeline-cover-section"]/div[2]/div/div[1]/div[1]/a/img').get_attribute('src')
				except:
					photo_url = self.driver.find_element_by_xpath('//div[@id="m-timeline-cover-section"]/div[2]/div/div[1]/a/img')
			sx['photo_url'] = photo_url


		for sx in sx_list:
			self.driver.get(sx['message_url'])
			time.sleep(1)

			for message in self.driver.find_elements_by_xpath('//div[@id="messageGroup"]/div/div'):
				try:
					date = message.find_element_by_xpath('./div[2]/abbr').text
				except:
					break
				print date

				try:
					messageTime = self.date2timestamp(date)					
				except:
					messageTime = 0
				print messageTime

				try:
					#messageId = re.findall(re.compile('"fbid:(\d+)"'),message.find_element_by_xpath('./div/div').get_attribute('participants'))[-1]
					#if messageId == sx['author_id']:
					#	private_type = 'receive'
					#	text = message.text
					#	root_text = 'None'
					#else:
					#	private_type = 'make'
					#	text = 'None'
					#	root_text = message.text
					if re.findall(r'id=(\d+)&', message.find_element_by_xpath('./div[1]/a').get_attribute('href')):
						private_type = 'receive'
						text = message.text
						root_text = ''
					else:
						private_type = 'make'
						text = ''
						root_text = message.text
				except:
					private_type = 'unknown'
					text = message.text
					root_text = ''
			self.list.append({'uid':sx['author_id'], 'photo_url':sx['photo_url'], 'nick_name':sx['author_name'], 'timestamp':messageTime, 'update_time':self.update_time, 'text':text, 'root_text':root_text, 'private_type':private_type})

		self.driver.quit()
		return self.list

	def save(self, indexName, typeName, list):
		self.es.executeES(indexName, typeName, list)
Esempio n. 9
0
class Like():
	def __init__(self, username, password):
		self.launcher = Launcher(username, password)
		self.driver = self.launcher.login()
		self.like_list = self.launcher.get_like_list()
		self.es = Es_fb()
		self.list = []
		self.update_time = int(time.time())

	def get_like(self):
		try:
			for url in self.like_list:
				self.driver.get(url)
				time.sleep(1)
				# 退出通知弹窗进入页面
				try:
					self.driver.find_element_by_xpath('//div[@class="_n8 _3qx uiLayer _3qw"]').click()
				except:
					pass

				try:
					text = self.driver.find_element_by_xpath('//div[@class="_5pbx userContent _22jv _3576"]').text
				except Exception as e:
					text = 'None'
				try:
					try:
						timestamp = int(self.driver.find_element_by_xpath('//abbr[@class="_5ptz"]').get_attribute('data-utime'))
					except:
						timestamp = int(self.driver.find_element_by_xpath('//abbr[@class="_5ptz timestamp livetimestamp"]').get_attribute('data-utime'))
				except:
					timestamp = 0
				try:
					mid = ''.join(re.findall(re.compile('/(\d+)'),self.driver.find_element_by_xpath('//a[@class="_5pcq"]').get_attribute('href')))
				except:
					mid = 0
				# 进入点赞列表页
				self.driver.get(self.driver.find_element_by_xpath('//a[@class="_2x4v"]').get_attribute('href'))
				time.sleep(5)
				# 退出通知弹窗进入页面
				try:
					self.driver.find_element_by_xpath('//div[@class="_n8 _3qx uiLayer _3qw"]').click()
				except:
					pass
				for each in self.driver.find_elements_by_xpath('//li[@class="_5i_q"]'):
					try:
						author_name = each.find_element_by_xpath('./div/div/div/div[1]/div[2]/div/a').text
					except:
						author_name = 'None'
					try:
						author_id = ''.join(re.findall(re.compile('id=(\d+)'),each.find_element_by_xpath('./div/div/div/div[1]/div[2]/div/a').get_attribute('data-hovercard')))
					except:
						author_id = 'None'
					try:
						pic_url = each.find_element_by_xpath('./div/a/div/img').get_attribute('src')
					except:
						pic_url = 'None'

					item = {'uid':author_id, 'photo_url':pic_url, 'nick_name':author_name, 'timestamp':timestamp, 'text':text, 'update_time':self.update_time, 'root_text':text, 'root_mid':mid}
					self.list.append(item)
		finally:
			self.driver.close()
		return self.list

	def save(self, indexName, typeName, list):
		self.es.executeES(indexName, typeName, list)
Esempio n. 10
0
class Friend():
	def __init__(self, username, password):
		self.launcher = Launcher(username, password)
		self.driver = self.launcher.login()
		time.sleep(2)
		# 退出通知弹窗进入页面
		try:
			self.driver.find_element_by_xpath('//div[@class="_n8 _3qx uiLayer _3qw"]').click()
		except:
			pass
		# # 进入个人主页
		# self.driver.find_element_by_xpath('//a[@title="个人主页"]').click()
		# time.sleep(3)
		# # 退出通知弹窗进入页面
		# try:
		# 	self.driver.find_element_by_xpath('//div[@class="_n8 _3qx uiLayer _3qw"]').click()
		# except:
		# 	pass
		# # 点击好友列表
		# self.driver.find_element_by_xpath('//ul[@data-referrer="timeline_light_nav_top"]/li[3]/a').click()
		# time.sleep(3)

		# 进入好友请求页面
		self.driver.get('https://www.facebook.com/friends/requests')
		time.sleep(3)
		# 退出通知弹窗进入页面
		try:
			self.driver.find_element_by_xpath('//div[@class="_n8 _3qx uiLayer _3qw"]').click()
		except:
			pass

		#加载更多
		length=100
		for i in range(0,20):
			js="var q=document.documentElement.scrollTop="+str(length) 
			self.driver.execute_script(js) 
			time.sleep(1)
			length+=400

		self.es = Es_fb()
		self.list = []
		self.current_ts = int(time.time())
		self.update_time = self.current_ts

	def get_friend(self):
		try:
			# for each in self.driver.find_elements_by_xpath('//div[@class="_5h60 _30f"]//ul//li'):
			# 	try:
			# 		pic_url = each.find_element_by_xpath('./div/a/img').get_attribute('src')
			# 		name = each.find_element_by_xpath('./div/div/div[2]/div/div[2]/div/a').text
			# 		user_id = ''.join(re.findall(re.compile('id=(\d+)'),each.find_element_by_xpath('./div/div/div[2]/div/div[2]/div/a').get_attribute('data-hovercard')))
			# 		friends = each.find_element_by_xpath('./div/div/div[2]/div/div[2]/a').text
			# 		profile_url = each.find_element_by_xpath('./div/div/div[2]/div/div[2]/div/a').get_attribute('href') + '&sk=about'
			# 	except:
			# 		pass
			for each in self.driver.find_elements_by_xpath('//div[@id="globalContainer"]/div/div/div/div/div[3]/div'):
				try:
					pic_url = each.find_element_by_xpath('./a/div/img').get_attribute('src')
					name = each.find_element_by_xpath('./div/div[2]/div[1]/a').text
					user_id = ''.join(re.findall(re.compile('id=(\d+)'),each.find_element_by_xpath('./div/div[2]/div[1]/a').get_attribute('data-hovercard')))
					friends = None
					profile_url = each.find_element_by_xpath('./div/div[2]/div[1]/a').get_attribute('href')
					self.list.append({'uid':user_id, 'photo_url':pic_url, 'nick_name':name, 'friends':friends, 'profile_url':profile_url, 'update_time':self.update_time})
				except Exception as e:
					print(e)
		finally:
			self.driver.quit()
		return self.list

	def save(self, indexName, typeName, list):
		self.es.executeES(indexName, typeName, list)
Esempio n. 11
0
class Mention():
    def __init__(self, username, password):
        self.launcher = Launcher(username, password)
        self.driver = self.launcher.login()
        self.mention_list = self.launcher.get_mention_list()
        self.es = Es_fb()
        self.list = []

    def get_mention(self):
        for url in self.mention_list:
            print(url)
            self.driver.get(url)
            for each in self.driver.find_elements_by_xpath(
                    '//div[@id="contentArea"]'):
                try:
                    author_name = each.find_element_by_xpath(
                        './div/div/div[3]/div/div/div/div[2]/div[1]/div[2]/div[1]/div/div/div[2]/div/div/div[2]/h5/span/span/span/a'
                    ).text
                except:
                    author_name = each.find_element_by_xpath(
                        './div/div/div/div/div/div/div[2]/div[1]/div[2]/div[1]/div/div/div[2]/div/div/div[2]/h5/span/span/span/a'
                    ).text
                try:
                    author_id = ''.join(
                        re.findall(
                            re.compile('id=(\d+)'),
                            each.find_element_by_xpath(
                                './div/div/div[3]/div/div/div/div[2]/div[1]/div[2]/div[1]/div/div/div[2]/div/div/div[2]/h5/span/span/span/a'
                            ).get_attribute('data-hovercard')))
                except:
                    author_id = ''.join(
                        re.findall(
                            re.compile('id=(\d+)'),
                            each.find_element_by_xpath(
                                './div/div/div/div/div/div/div[2]/div[1]/div[2]/div[1]/div/div/div[2]/div/div/div[2]/h5/span/span/span/a'
                            ).get_attribute('data-hovercard')))
                try:
                    pic_url = each.find_element_by_xpath(
                        './div/div/div[3]/div/div/div/div[2]/div/div[2]/div/div/a/div/img'
                    ).get_attribute('src')
                except:
                    pic_url = each.find_element_by_xpath(
                        './div/div/div/div/div/div/div[2]/div/div[2]/div/div/a/div/img'
                    ).get_attribute('src')
                try:
                    ti = int(
                        each.find_element_by_xpath(
                            './div/div/div[3]/div/div/div/div[2]/div/div[2]/div/div/div/div[2]/div/div/div[2]/div/span[3]/span/span/a/abbr'
                        ).get_attribute('data-utime'))
                except:
                    ti = int(
                        each.find_element_by_xpath(
                            './div/div/div/div/div/div/div[2]/div/div[2]/div/div/div/div[2]/div/div/div[2]/div/span[3]/span/a/abbr'
                        ).get_attribute('data-utime'))
                try:
                    content = each.find_element_by_xpath(
                        './div/div/div/div/div/div/div[2]/div/div[2]/div[2]/p'
                    ).text
                except Exception as e:
                    content = 'None'
                item = {
                    'nick_name': author_name,
                    'uid': author_id,
                    'photo_url': pic_url,
                    'timestamp': ti,
                    'text': content
                }
                self.list.append(item)
        return self.list

    def save(self, indexName, typeName, list):
        self.es.executeES(indexName, typeName, list)
Esempio n. 12
0
class Mention():
    def __init__(self, username, password):
        self.launcher = Launcher(username, password)
        self.driver = self.launcher.login()
        self.mention_list = self.launcher.get_mention_list()
        self.es = Es_fb()
        self.list = []
        self.update_time = int(time.time())

    def get_mention(self):
        try:
            for url in self.mention_list:
                self.driver.get(url)
                time.sleep(1)
                # 退出通知弹窗进入页面
                try:
                    self.driver.find_element_by_xpath(
                        '//div[@class="_n8 _3qx uiLayer _3qw"]').click()
                except:
                    pass

                for each in self.driver.find_elements_by_xpath(
                        '//div[@id="contentArea"]'):
                    try:
                        try:
                            author_name = each.find_element_by_xpath(
                                './div/div/div[3]/div/div/div/div[2]/div[1]/div[2]/div[1]/div/div/div[2]/div/div/div[2]/h5/span/span/span/a'
                            ).text
                        except:
                            author_name = each.find_element_by_xpath(
                                './div/div/div/div/div/div/div[2]/div[1]/div[2]/div[1]/div/div/div[2]/div/div/div[2]/h5/span/span/span/a'
                            ).text
                    except:
                        author_name = 'None'
                    try:
                        try:
                            author_id = ''.join(
                                re.findall(
                                    re.compile('id=(\d+)'),
                                    each.find_element_by_xpath(
                                        './div/div/div[3]/div/div/div/div[2]/div[1]/div[2]/div[1]/div/div/div[2]/div/div/div[2]/h5/span/span/span/a'
                                    ).get_attribute('data-hovercard')))
                        except:
                            author_id = ''.join(
                                re.findall(
                                    re.compile('id=(\d+)'),
                                    each.find_element_by_xpath(
                                        './div/div/div/div/div/div/div[2]/div[1]/div[2]/div[1]/div/div/div[2]/div/div/div[2]/h5/span/span/span/a'
                                    ).get_attribute('data-hovercard')))
                    except:
                        author_id = 'None'
                    try:
                        try:
                            pic_url = each.find_element_by_xpath(
                                './div/div/div[3]/div/div/div/div[2]/div/div[2]/div/div/a/div/img'
                            ).get_attribute('src')
                        except:
                            pic_url = each.find_element_by_xpath(
                                './div/div/div/div/div/div/div[2]/div/div[2]/div/div/a/div/img'
                            ).get_attribute('src')
                    except:
                        pic_url = 'None'
                    try:
                        try:
                            ti = int(
                                each.find_element_by_xpath(
                                    './div/div/div[3]/div/div/div/div[2]/div/div[2]/div/div/div/div[2]/div/div/div[2]/div/span[3]/span/span/a/abbr'
                                ).get_attribute('data-utime'))
                        except:
                            ti = int(
                                each.find_element_by_xpath(
                                    './div/div/div/div/div/div/div[2]/div/div[2]/div/div/div/div[2]/div/div/div[2]/div/span[3]/span/a/abbr'
                                ).get_attribute('data-utime'))
                    except:
                        ti = 'None'
                    try:
                        content = each.find_element_by_xpath(
                            './div/div/div/div/div/div/div[2]/div/div[2]/div[2]/p'
                        ).text
                    except:
                        content = 'None'
                    try:
                        try:
                            mid = ''.join(
                                re.findall(
                                    re.compile('/(\d+)'),
                                    each.find_element_by_xpath(
                                        './div/div/div[3]/div/div/div/div[2]/div/div[2]/div/div/div/div[2]/div/div/div[2]/div/span[3]/span/span/a'
                                    ).get_attribute('href')))
                        except:
                            mid = ''.join(
                                re.findall(
                                    re.compile('/(\d+)'),
                                    each.find_element_by_xpath(
                                        './div/div/div/div/div/div/div[2]/div/div[2]/div/div/div/div[2]/div/div/div[2]/div/span[3]/span/a'
                                    ).get_attribute('href')))
                    except:
                        mid = 'None'
                    item = {
                        'uid': author_id,
                        'photo_url': pic_url,
                        'nick_name': author_name,
                        'mid': mid,
                        'timestamp': ti,
                        'text': content,
                        'update_time': self.update_time
                    }
                    self.list.append(item)
        finally:
            self.driver.close()
        return self.list

    def save(self, indexName, typeName, list):
        self.es.executeES(indexName, typeName, list)
Esempio n. 13
0
class Mention():
    def __init__(self, username, password):
        self.launcher = Launcher(username, password)
        self.mention_list, self.driver = self.launcher.get_mention_list()
        self.es = Es_fb()
        self.list = []
        self.update_time = int(time.time())

    def date2timestamp(self, date):
        date = date.replace(u'月', '-').replace(u'日', '').replace(' ', '')
        if date == '刚刚':
            timestamp = int(time.time())
            return timestamp
        if u'上午' in date:
            date = date.replace(u'上午', ' ')
        if u'下午' in date:
            if date.split(u'下午')[1].split(':')[0] == '12':
                date = date.replace(u'下午', ' ')
            elif eval(date.split(u'下午')[1].split(':')[0]) < 12:
                date = date.split(u'下午')[0] + ' ' + str(
                    eval(date.split(u'下午')[1].split(':')[0]) +
                    12) + ':' + date.split(u'下午')[1].split(':')[1]
        if u'年' not in date and u'分钟' not in date and u'小时' not in date:
            date = str(
                time.strftime('%Y-%m-%d', time.localtime(
                    time.time())).split('-')[0]) + '-' + date
        if u'年' in date and u'分钟' not in date and u'小时' not in date:
            date = date.replace(u'年', '-')
        if u'分钟' in date:
            timestamp = int(
                time.time()) - int(re.search(r'(\d+)', date).group(1)) * 60
            return timestamp
        if u'小时' in date:
            timestamp = int(time.time()) - int(
                re.search(r'(\d+)', date).group(1)) * 60 * 60
            return timestamp
        try:
            timestamp = int(time.mktime(time.strptime(date, '%Y-%m-%d')))
        except:
            timestamp = int(time.mktime(time.strptime(date, '%Y-%m-%d %H:%M')))
        return timestamp

    def get_mention(self):

        for url in self.mention_list:
            self.driver.get(url)
            time.sleep(1)

            try:
                nick_name = self.driver.find_element_by_xpath(
                    '//div[@id="root"]/div[1]/div[1]/div/div[1]/div[1]/table/tbody/tr/td[2]/div/h3/strong/a'
                ).text
            except:
                nick_name = ''
            print nick_name

            try:
                uid = re.findall(
                    r'id=(\d+)',
                    self.driver.find_element_by_xpath(
                        '//div[@id="root"]/div[1]/div[1]/div/div[1]/div[1]/table/tbody/tr/td[2]/div/h3/strong/a'
                    ).get_attribute('href'))[0]
            except:
                uid = ''
            print uid

            try:
                timestamp = self.date2timestamp(
                    self.driver.find_element_by_xpath(
                        '//div[@id="root"]/div[1]/div[1]/div/div[2]/div/abbr').
                    text)
            except:
                timestamp = 0
            print timestamp

            try:
                text = self.driver.find_element_by_xpath(
                    '//div[@id="root"]/div[1]/div[1]/div/div[1]/div[2]').text
            except:
                text = ''
            print text

            try:
                mid = ''.join(re.findall(re.compile('fbid%3D(\d+)'), url))
            except:
                mid = ''
            print mid

            item = {
                'uid': uid,
                'nick_name': nick_name,
                'mid': mid,
                'timestamp': timestamp,
                'text': text,
                'update_time': self.update_time
            }
            self.list.append(item)

        for i in self.list:
            self.driver.get('https://m.facebook.com/profile.php?id=' +
                            str(i['uid']))
            try:
                photo_url = self.driver.find_element_by_xpath(
                    '//div[@id="m-timeline-cover-section"]/div[1]/div[2]/div[1]/div/a/img'
                ).get_attribute('src')
            except:
                try:
                    photo_url = self.driver.find_element_by_xpath(
                        '//div[@id="m-timeline-cover-section"]/div[2]/div/div[1]/div[1]/a/img'
                    ).get_attribute('src')
                except:
                    photo_url = self.driver.find_element_by_xpath(
                        '//div[@id="m-timeline-cover-section"]/div[2]/div/div[1]/a/img'
                    ).get_attribute('src')
            i['photo_url'] = photo_url

        self.driver.quit()
        return self.list

    def save(self, indexName, typeName, mention_list):
        self.es.executeES(indexName, typeName, mention_list)
Esempio n. 14
0
class Share():
    def __init__(self, username, password):
        self.launcher = Launcher(username, password)
        self.es = Es_fb()
        self.list = []
        self.share_list, self.driver = self.launcher.get_share_list()
        self.update_time = int(time.time())

    def get_share(self):

        for url in self.share_list:
            self.driver.get(url)
            time.sleep(120)
            # 退出通知弹窗进入页面

            try:
                self.driver.find_element_by_xpath(
                    '//div[@class="_n8 _3qx uiLayer _3qw"]').click()
            except:
                pass

            page = self.driver.page_source
            self.driver.save_screenshot('get_share000.png')

            #for ea in self.driver.find_elements_by_xpath('//div[@role="feed"]/div'):
            #for ea in divs:
            #	for each in ea.find_elements_by_xpath('./div'):
            try:
                author_name = self.driver.find_element_by_xpath(
                    '//table[@role="presentation"]/tbody/tr/td[2]/div/h3/strong/a'
                ).text
            except:
                author_name = ''
            print author_name

            try:
                author_id = ''.join(
                    re.search(re.compile('id%3D(\d+)&'), url).group(1))
            except:
                author_id = ''
            print author_id
            #		try:
            #			pic_url = each.find_element_by_xpath('./div[2]/div/div[2]/div/div/a/div/img').get_attribute('src')
            #		except:
            #			pic_url = 'None'

            try:
                content = self.driver.find_element_by_xpath(
                    '/html/body/div/div/div[2]/div/div[1]/div[1]/div/div[1]/div[2]'
                ).text
            except:
                content = ''

            try:
                timestamp = int(
                    re.search(
                        re.compile('&quot;publish_time&quot;:(\d+),'),
                        page.replace(' ',
                                     '').replace('\n',
                                                 '').replace('\t',
                                                             '')).group(1))
            except:
                timestamp = ''
            print timestamp

            try:
                mid = ''.join(
                    re.search(re.compile('fbid%3D(\d+)%'), url).group(1))
            except:
                mid = ''
            print mid

            try:
                root_mid = ''.join(
                    re.search(
                        re.compile(
                            '&quot;original_content_id&quot;:&quot;(\d+)&quot;'
                        ), page).group(1))
            except:
                root_mid = ''
            print root_mid

            try:
                root_text = self.driver.find_element_by_xpath(
                    '/html/body/div/div/div[2]/div/div[1]/div[1]/div/div[1]/div[3]/div[2]/div/div/div[2]'
                ).text.replace(' ', '').replace('\n', '').replace('\t', '')
            except:
                root_text = ''
            print root_text

            item = {'uid':author_id, 'nick_name':author_name, 'mid':mid, 'timestamp':timestamp,\
               'text':content, 'update_time':self.update_time, 'root_text':root_text, 'root_mid':root_mid}
            self.list.append(item)

        self.driver.quit()
        return self.list

    def save(self, indexName, typeName, list):
        self.es.executeES(indexName, typeName, list)
Esempio n. 15
0
class Like():
    def __init__(self, username, password):
        self.launcher = Launcher(username, password)
        self.driver = self.launcher.login()
        self.like_list = self.launcher.get_like_list()
        self.es = Es_fb()
        self.list = []

    def get_like(self):
        for url in self.like_list:
            self.driver.get(url)
            try:
                root_name = self.driver.find_element_by_xpath(
                    '//span[@class="fwb"]').text
            except:
                root_name = self.driver.find_element_by_xpath(
                    '//span[@class="fwb fcg"]').text
            try:
                id = ''.join(
                    re.findall(
                        re.compile('id=(\d+)'),
                        self.driver.find_element_by_xpath(
                            '//span[@class="fwb"]/a').get_attribute(
                                'data-hovercard')))
            except:
                id = ''.join(
                    re.findall(
                        re.compile('id=(\d+)'),
                        self.driver.find_element_by_xpath(
                            '//span[@class="fwb fcg"]/a').get_attribute(
                                'data-hovercard')))
            try:
                root_content = self.driver.find_element_by_xpath(
                    '//div[@class="_5pbx userContent _22jv _3576"]/p').text
            except Exception as e:
                root_content = 'None'
            try:
                timestamp = int(
                    self.driver.find_element_by_xpath(
                        '//abbr[@class="_5ptz"]').get_attribute('data-utime'))
            except:
                timestamp = int(
                    self.driver.find_element_by_xpath(
                        '//abbr[@class="_5ptz timestamp livetimestamp"]').
                    get_attribute('data-utime'))
            self.driver.get(
                self.driver.find_element_by_xpath(
                    '//a[@class="_2x4v"]').get_attribute('href'))
            time.sleep(10)
            for each in self.driver.find_elements_by_xpath(
                    '//li[@class="_5i_q"]'):
                author_name = each.find_element_by_xpath(
                    './div/div/div/div[1]/div[2]/div/a').text
                author_id = ''.join(
                    re.findall(
                        re.compile('id=(\d+)'),
                        each.find_element_by_xpath(
                            './div/div/div/div[1]/div[2]/div/a').get_attribute(
                                'data-hovercard')))
                pic_url = each.find_element_by_xpath(
                    './div/a/div/img').get_attribute('src')
                try:
                    relationship = each.find_element_by_xpath(
                        './div/div/div/div[2]/div[2]/span/div/a/span[2]/span'
                    ).text
                except:
                    relationship = "None"
                item = {
                    'nick_name': author_name,
                    'uid': author_id,
                    'photo_url': pic_url,
                    'facebook_type': relationship,
                    'root_name': root_name,
                    'id': id,
                    'root_content': root_content,
                    'timestamp': timestamp
                }
                self.list.append(item)
        return self.list

    def save(self, indexName, typeName, list):
        self.es.executeES(indexName, typeName, list)
Esempio n. 16
0
class Friend():
    def __init__(self, username, password):
        self.launcher = Launcher(username, password)
        self.driver = self.launcher.login()
        time.sleep(2)
        self.driver.find_element_by_xpath('//a[@title="个人主页"]').click()
        time.sleep(3)
        self.driver.find_element_by_xpath(
            '//ul[@data-referrer="timeline_light_nav_top"]/li[3]/a').click()
        time.sleep(1)
        self.driver.execute_script("""
			(function () {
			var y = 0;
			var step = 100;
			window.scroll(0, 0);
			function f() {
			if (y < document.body.scrollHeight) {
			y += step;
			window.scroll(0, y);
			setTimeout(f, 150);
			} else {
			window.scroll(0, 0);
			document.title += "scroll-done";
			}
			}
			setTimeout(f, 1500);
			})();
			""")
        time.sleep(3)
        while True:
            if "scroll-done" in self.driver.title:
                break
            else:
                time.sleep(3)
        self.data_gt = self.driver.find_element_by_xpath(
            '//div[@id="contentArea"]/div[1]').get_attribute('data-gt')
        self.root_uid = json.loads(self.data_gt)['profile_owner']
        self.es = Es_fb()
        self.list = []
        self.current_ts = int(time.time())
        self.update_time = self.current_ts

    def get_friend(self):
        for each in self.driver.find_elements_by_xpath(
                '//div[@class="_5h60 _30f"]//ul//li'):
            try:
                pic_url = each.find_element_by_xpath(
                    './div/a/img').get_attribute('src')
                name = each.find_element_by_xpath(
                    './div/div/div[2]/div/div[2]/div/a').text
                user_id = ''.join(
                    re.findall(
                        re.compile('id=(\d+)'),
                        each.find_element_by_xpath(
                            './div/div/div[2]/div/div[2]/div/a').get_attribute(
                                'data-hovercard')))
                update_time = self.update_time
            except Exception as e:
                pass
            self.list.append({
                'root_uid': self.root_uid,
                'photo_url': pic_url,
                'nick_name': name,
                'uid': user_id,
                'update_time': update_time
            })
        return self.list

    def save(self, indexName, typeName, list):
        self.es.executeES(indexName, typeName, list)
Esempio n. 17
0
class Comment():
    def __init__(self, username, password):
        self.launcher = Launcher(username, password)
        self.driver = self.launcher.login()
        self.es = Es_fb()
        self.comment_list = self.launcher.get_comment_list()
        self.list = []

    def get_comment(self):
        for url in self.comment_list:
            print(url)
            self.driver.get(url)
            time.sleep(1)
            try:
                root_content = self.driver.find_element_by_xpath(
                    '//div[@role="feed"]/div[1]/div[1]/div[2]/div[1]/div[2]/div[2]'
                ).text
            except:
                root_content = self.driver.find_element_by_xpath(
                    '//div[@role="feed"]/div[1]/div[1]/div[1]/div[1]/div[2]/div[2]'
                ).text
            try:
                root_time = self.driver.find_element_by_xpath(
                    '//abbr[@class="_5ptz"]').get_attribute('data-utime')
            except:
                root_time = self.driver.find_element_by_xpath(
                    '//abbr[@class="_5ptz timestamp livetimestamp"]'
                ).get_attribute('data-utime')
            for each in self.driver.find_elements_by_xpath(
                    '//div[@aria-label="评论"]'):
                try:
                    author_name = each.find_element_by_xpath(
                        './div/div/div/div[2]/div/div/div/div/div/span/span[1]/a'
                    ).text
                except:
                    author_name = each.find_element_by_xpath(
                        './div/div/div/div[2]/div/div/div/span/span[1]/a').text
                try:
                    author_id = ''.join(
                        re.findall(
                            re.compile('id=(\d+)'),
                            each.find_element_by_xpath(
                                './div/div/div/div[2]/div/div/div/div/div/span/span[1]/a'
                            ).get_attribute('data-hovercard')))
                except:
                    author_id = ''.join(
                        re.findall(
                            re.compile('id=(\d+)'),
                            each.find_element_by_xpath(
                                './div/div/div/div[2]/div/div/div/span/span[1]/a'
                            ).get_attribute('data-hovercard')))
                pic_url = each.find_element_by_xpath(
                    './div/div/div/div[1]/a/img').get_attribute('src')
                try:
                    content = each.find_element_by_xpath(
                        './div/div/div/div[2]/div/div/div/div/div/span/span[2]/span/span/span/span'
                    ).text
                except:
                    content = each.find_element_by_xpath(
                        './div/div/div/div[2]/div/div/div/span/span[2]/span/span/span/span'
                    ).text
                try:
                    ti = int(
                        each.find_element_by_xpath(
                            './div/div/div/div[2]/div/div/div[2]/span[4]/a/abbr'
                        ).get_attribute('data-utime'))
                except:
                    ti = int(
                        each.find_element_by_xpath(
                            './div/div/div/div[2]/div/div/div[2]/span[5]/a/abbr'
                        ).get_attribute('data-utime'))
                self.list.append({
                    'nick_name': author_name,
                    'uid': author_id,
                    'photo_url': pic_url,
                    'text': content,
                    'timestamp': ti
                })
        return self.list

    def save(self, indexName, typeName, list):
        self.es.executeES(indexName, typeName, list)
Esempio n. 18
0
class Share():
    def __init__(self, username, password):
        self.launcher = Launcher(username, password)
        self.es = Es_fb()
        self.list = []
        self.share_list, self.driver, self.display = self.launcher.get_share_list(
        )
        self.update_time = int(time.time())

    def get_share(self):
        try:
            for url in self.share_list:
                self.driver.get(url)
                time.sleep(1)
                # 退出通知弹窗进入页面
                try:
                    self.driver.find_element_by_xpath(
                        '//div[@class="_n8 _3qx uiLayer _3qw"]').click()
                except:
                    pass

                for ea in self.driver.find_elements_by_xpath(
                        '//div[@role="feed"]/div'):
                    for each in ea.find_elements_by_xpath('./div'):
                        try:
                            author_name = each.find_element_by_xpath(
                                './div[2]/div[1]/div[2]/div[1]/div/div/div[2]/div/div/div[2]/h5/span/span/span/a'
                            ).text
                        except:
                            author_name = 'None'
                        try:
                            author_id = ''.join(
                                re.findall(
                                    re.compile('id=(\d+)'),
                                    each.find_element_by_xpath(
                                        './div[2]/div[1]/div[2]/div[1]/div/div/div[2]/div/div/div[2]/h5/span/span/span/a'
                                    ).get_attribute('data-hovercard')))
                        except:
                            author_id = 'None'
                        try:
                            pic_url = each.find_element_by_xpath(
                                './div[2]/div/div[2]/div/div/a/div/img'
                            ).get_attribute('src')
                        except:
                            pic_url = 'None'
                        try:
                            content = each.find_element_by_xpath(
                                './div[2]/div/div[2]/div[2]').text
                        except:
                            content = 'None'
                        try:
                            try:
                                timestamp = int(
                                    each.find_element_by_xpath(
                                        './div[2]/div/div[2]/div/div/div/div[2]/div/div/div[2]/div/span[3]/span/a/abbr'
                                    ).get_attribute('data-utime'))
                            except:
                                timestamp = int(
                                    each.find_element_by_xpath(
                                        './div[2]/div/div[2]/div/div/div/div[2]/div/div/div[2]/div/span[2]/span/a/abbr'
                                    ).get_attribute('data-utime'))
                        except:
                            timestamp = 'None'
                        try:
                            mid = ''.join(
                                re.findall(
                                    re.compile('/(\d+)'),
                                    each.find_element_by_xpath(
                                        './div[2]/div/div[2]/div/div/div/div[2]/div/div/div[2]/div/span[3]/span/a'
                                    ).get_attribute('href')))
                        except:
                            mid = 'None'
                        try:
                            root_mid = ''.join(
                                re.findall(
                                    re.compile('story_fbid=(\d+)'),
                                    each.find_element_by_xpath(
                                        './div[2]/div/div[2]/div/div/div/div[2]/div/div/div[2]/h5/span/span/a'
                                    ).get_attribute('href')))
                        except:
                            root_mid = 'None'
                        item = {'uid':author_id, 'photo_url':pic_url, 'nick_name':author_name, 'mid':mid, 'timestamp':timestamp,\
                           'text':content, 'update_time':self.update_time, 'root_text':content, 'root_mid':root_mid}
                        self.list.append(item)
        finally:
            self.driver.quit()
            self.display.popen.kill()
        return self.list

    def save(self, indexName, typeName, list):
        self.es.executeES(indexName, typeName, list)