Ejemplo n.º 1
0
def collect(url):
	r = requests.get(url, cookies=cookie, verify=False)
	tree = html.fromstring(r.content)
	postURI = tree.xpath('.//a[text()="Full Story"]')
	nextPage = tree.xpath('.//a/span[text()="See More Posts"]')[0]

	for x in postURI:
		print x.xpath('@href')

	curUrl = nextPage.getparent().xpath('@href')[0]
	finURL = "/groups/625191517538301?bacr=1388816159%3A633308973393222&refid=18"

	if curUrl != finURL:
		for x in postURI:
			link = x.xpath('@href')[0]
			newLink = Permalink.create(slug = link)
			newLink.save()
	print ""
	print curUrl
	print ""

	collect("https://mbasic.facebook.com" +curUrl)
Ejemplo n.º 2
0
					comment = x.xpath('.//div[1]')[0]
					c = etree.tostring(comment)
					try:
						like = x.xpath('.//a[@aria-label="Like"]/text()')[0]
					except IndexError:
						like = str(0)
					timestamp = x.xpath('.//abbr/text()')[0]
					# commentt = Comment(potato=pot,
					# 				   helper=name,
					# 				   helper_slug=link,
					# 				   power=like,
					# 				   timestamp=timestamp,
					# 				   answer=c)
					# commentt.save()
					print c
					# print name, link, like, timestamp, c
					# print etree.tostring(x, pretty_print=True)
			except:
				pass

		# print etree.tostring(description, pretty_print=True)
		# print etree.tostring(base, pretty_print=True)
	except:
		pass

if __name__ == "__main__":
	# collect("https://mbasic.facebook.com/groups/625191517538301")
	# scrape("https://m.facebook.com/groups/625191517538301?view=permalink&id=973219502735499&refid=18&_ft_=qid.6204099145172029259%3Amf_story_key.973219502735499%3Atl_objid.973219502735499#footer_action_list")
	for i, x in enumerate(Permalink.select()):
		print i, x.slug
		scrape("https://mbasic.facebook.com" + x.slug)