Ejemplo n.º 1
0
	def get_files(self, url):
		position_link = 0
		if url.find('youtube.com')>position_link:
			d = Download(url)
			div = d.get_tags('div', 'class', 'playlist-videos-container yt-scrollbar-dark yt-scrollbar')
			indice_div = 0
			videos = d.get_tags_html(str(div[indice_div]), 'a', 'class', 'yt-uix-sessionlink')
#			print videos[:]
			if videos != 'error':
				links = d.get_urls('youtube', videos, 'href')
				video_url_preffix = 'https://www.youtube.com'
				for valor in links:
					valor = (video_url_preffix+valor).split('&')[0]
					self.download_now(str(valor), 'mp4', '/Users/samuel/Documents/python')
					print '\n'
				print 'se descargo la lista el archivo'
			else: print 'link de youtube no hace referencia a un archivo valido'
Ejemplo n.º 2
0
	def get_file(self, search, limite, page):
		bb = True
		self.page = page
		self.search = search
		while bb == True:
			url = 'https://en.wikipedia.org/w/index.php?limit='+limite+'&offset='+self.page+'&search='+((self.search).replace(' ', '+'))
			print url
			d = Download(url)
			chapters = d.get_tags('ul', 'class', 'mw-search-results')
			title = d.get_tags_file(str(chapters[0]),'div', 'class', 'mw-search-result-heading')
			desc = d.get_tags_file(str(chapters[0]),'div', 'class', 'searchresult')
			index = 0
			link = []
			for index in range(len(title)):
				link.append(d.get_url_wikipedia(title[index], 'a', 'href'))
				text_title = d.get_tags_text(str(title[index]))
				text_desc = d.get_tags_text(str(desc[index]))
				print link
				print '\033[94m[',(index+1), '] ', text_title, '\033[0m\n', text_desc,'\n'
			option=raw_input("ingrese el numero de una busqueda\npresione n para ir a la pagina siguiente\npresione p para ir a la pagina anterior\n")
			if(option == 'n'):
				self.page = str(int(float(self.page)) + int(float(limite)))
				print '\n\n'
			if(option == 'p' and int(float(self.page)) >= 0 ):
				self.page = str(int(float(self.page)) - int(float(limite)))
				print '\n\n'
			if(option.isdigit()):
				if(int(float(option)) > 0 and int(float(option)) <= (index+1)):
					a = 'https://en.wikipedia.org'+str(link[int(float(option))-1][0])
					v = Download(a)
					div = v.get_tags('div', 'id', 'bodyContent')
					text_page = v.get_tags_text(str(div[0]))
					print '\n',a,'\n','\033[97m'+text_page+'\033[0m\n'
					bb = False
			chapters = []
			chapters = []
			title = []
			desc = []
			text_title = []
			text_desc = []
			url = ''
Ejemplo n.º 3
0
	def get_file(self, url):
		position_link = 0
		if url.find('slideshare.net')>position_link:
			d = Download(url)
			imgs = d.get_tags('img', 'class', 'slide_image')
			if imgs != 'error':
				links = d.get_urls('slideshare', imgs, 'data-full')
				pdf = FPDF('P', 'pt', 'Letter')
				index_cache = 0
				index_link = 0
				print "\nDownload images [",len(links),"]:\n"
				for pagina_pdf in links:
					file_path=urlretrieve(pagina_pdf.split('?')[index_link])[index_cache]
					print " ", pagina_pdf
					img=Image.open(file_path)
					pdf.add_page(format = img.size)
					pdf.image(file_path, x=0, y=0)
				pdf.output('slideshare.pdf', 'F')
				print 'se genero en archivo slideshare.pdf'
			else: print 'link de slideshare no hace referencia a un archivo valido'
		else: print 'link de slideshare no es valido'