def get_files(self, url): position_link = 0 if url.find('youtube.com')>position_link: d = Download(url) div = d.get_tags('div', 'class', 'playlist-videos-container yt-scrollbar-dark yt-scrollbar') indice_div = 0 videos = d.get_tags_html(str(div[indice_div]), 'a', 'class', 'yt-uix-sessionlink') # print videos[:] if videos != 'error': links = d.get_urls('youtube', videos, 'href') video_url_preffix = 'https://www.youtube.com' for valor in links: valor = (video_url_preffix+valor).split('&')[0] self.download_now(str(valor), 'mp4', '/Users/samuel/Documents/python') print '\n' print 'se descargo la lista el archivo' else: print 'link de youtube no hace referencia a un archivo valido'
def get_file(self, search, limite, page): bb = True self.page = page self.search = search while bb == True: url = 'https://en.wikipedia.org/w/index.php?limit='+limite+'&offset='+self.page+'&search='+((self.search).replace(' ', '+')) print url d = Download(url) chapters = d.get_tags('ul', 'class', 'mw-search-results') title = d.get_tags_file(str(chapters[0]),'div', 'class', 'mw-search-result-heading') desc = d.get_tags_file(str(chapters[0]),'div', 'class', 'searchresult') index = 0 link = [] for index in range(len(title)): link.append(d.get_url_wikipedia(title[index], 'a', 'href')) text_title = d.get_tags_text(str(title[index])) text_desc = d.get_tags_text(str(desc[index])) print link print '\033[94m[',(index+1), '] ', text_title, '\033[0m\n', text_desc,'\n' option=raw_input("ingrese el numero de una busqueda\npresione n para ir a la pagina siguiente\npresione p para ir a la pagina anterior\n") if(option == 'n'): self.page = str(int(float(self.page)) + int(float(limite))) print '\n\n' if(option == 'p' and int(float(self.page)) >= 0 ): self.page = str(int(float(self.page)) - int(float(limite))) print '\n\n' if(option.isdigit()): if(int(float(option)) > 0 and int(float(option)) <= (index+1)): a = 'https://en.wikipedia.org'+str(link[int(float(option))-1][0]) v = Download(a) div = v.get_tags('div', 'id', 'bodyContent') text_page = v.get_tags_text(str(div[0])) print '\n',a,'\n','\033[97m'+text_page+'\033[0m\n' bb = False chapters = [] chapters = [] title = [] desc = [] text_title = [] text_desc = [] url = ''
def get_file(self, url): position_link = 0 if url.find('slideshare.net')>position_link: d = Download(url) imgs = d.get_tags('img', 'class', 'slide_image') if imgs != 'error': links = d.get_urls('slideshare', imgs, 'data-full') pdf = FPDF('P', 'pt', 'Letter') index_cache = 0 index_link = 0 print "\nDownload images [",len(links),"]:\n" for pagina_pdf in links: file_path=urlretrieve(pagina_pdf.split('?')[index_link])[index_cache] print " ", pagina_pdf img=Image.open(file_path) pdf.add_page(format = img.size) pdf.image(file_path, x=0, y=0) pdf.output('slideshare.pdf', 'F') print 'se genero en archivo slideshare.pdf' else: print 'link de slideshare no hace referencia a un archivo valido' else: print 'link de slideshare no es valido'