def get_files(self, url): position_link = 0 if url.find('youtube.com')>position_link: d = Download(url) div = d.get_tags('div', 'class', 'playlist-videos-container yt-scrollbar-dark yt-scrollbar') indice_div = 0 videos = d.get_tags_html(str(div[indice_div]), 'a', 'class', 'yt-uix-sessionlink') # print videos[:] if videos != 'error': links = d.get_urls('youtube', videos, 'href') video_url_preffix = 'https://www.youtube.com' for valor in links: valor = (video_url_preffix+valor).split('&')[0] self.download_now(str(valor), 'mp4', '/Users/samuel/Documents/python') print '\n' print 'se descargo la lista el archivo' else: print 'link de youtube no hace referencia a un archivo valido'
def get_file(self, url): position_link = 0 if url.find('slideshare.net')>position_link: d = Download(url) imgs = d.get_tags('img', 'class', 'slide_image') if imgs != 'error': links = d.get_urls('slideshare', imgs, 'data-full') pdf = FPDF('P', 'pt', 'Letter') index_cache = 0 index_link = 0 print "\nDownload images [",len(links),"]:\n" for pagina_pdf in links: file_path=urlretrieve(pagina_pdf.split('?')[index_link])[index_cache] print " ", pagina_pdf img=Image.open(file_path) pdf.add_page(format = img.size) pdf.image(file_path, x=0, y=0) pdf.output('slideshare.pdf', 'F') print 'se genero en archivo slideshare.pdf' else: print 'link de slideshare no hace referencia a un archivo valido' else: print 'link de slideshare no es valido'