def test_download_to_download_folder(self): os.remove('settings.json') self.settings = sad.Settings(download_folder='Downloaded_Test') self.downloader = sad.Downloader() self.downloader.run() self.assertEqual(2, len(os.listdir('Downloaded_Test'))) shutil.rmtree('Downloaded_Test')
def main(): writer = csv.writer(open('countries.csv', 'w')) D = downloader.Downloader() html = D('http://example.webscraping.com/ajax/search.json?page=0&page_size=1000&search_term=.') ajax = json.loads(html) for record in ajax['records']: writer.writerow([record['country']])
def main(url_to_m3u8, download_dir, verbose, ignore_ssl): """ :type url_to_m3u8: str :type download_dir: str :type verbose: bool :type ignore_ssl: bool :rtype: None """ http_settings = dict(headers={ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6)" " AppleWebKit/602.4.8 (KHTML, like Gecko)" " Version/10.0.3 Safari/602.4.8" }, ) if ignore_ssl: http_settings["verify"] = False global DOWNLOADER DOWNLOADER = downloader.Downloader( download_dir=download_dir, http_settings=http_settings, ) logging.basicConfig(level=logging.INFO if verbose else logging.WARNING) process_main_playlist(url_to_m3u8)
def main(self): """総理大臣のCSVファイルをHTMLページへと変換するメインプログラム。""" # ホームディレクトリの直下のデスクトップのディレクトリに、 # SouriDaijinというディレクトリを作成する。 # すでに存在すれば、当該ディレクトリを消して、新たに作り、 # 存在しなければ、当該ディレクトリを作成する。 home_directory = os.environ['HOME'] base_directory = home_directory + '/Desktop/PrimeMinisters/' if os.path.isdir(base_directory): shutil.rmtree(base_directory) os.makedirs(base_directory) # ダウンローダに必要なファイル群をすべてダウンロードしてもらい、 # 入力となるテーブルを獲得する。 a_downloader = downloader.Downloader(base_directory) a_table = a_downloader.download_all() print a_table # トランスレータに入力となるテーブルを渡して変換してもらい、 # 出力となるテーブルを獲得する。 a_translator = translator.Translator(a_table) a_table = a_translator.table() print a_table # ライタに出力となるテーブルを渡して、 # Webページを作成してもらう。 a_writer = writer.Writer(base_directory, a_table) a_writer.write() return 0
def __init__(self): # Check the OS first if not str(sys.platform) == "darwin": self.u.head("Incompatible System") print(" ") print("This script can only be run from macOS/OS X.") print(" ") print("The current running system is \"{}\".".format(sys.platform)) print(" ") self.grab("Press [enter] to quit...") print(" ") exit(1) self.dl = downloader.Downloader() self.r = run.Run() self.u = utils.Utils() self.web_drivers = None self.os_build_number = None self.os_number = None self.wd_loc = None self.sip_checked = False self.installed_version = "Not Installed!" self.get_manifest() self.get_system_info()
def test_get_next_season(self): self.wl.update_watchlist('The.Big.Bang.Theory', 'S01E30') self.downloader = sad.Downloader() self.downloader.run() folder = os.listdir(os.getcwd()) downloaded_list = [f for f in folder if '.torrent' in f] self.assertEqual(2, len(downloaded_list)) self.assertIn('The.Big.Bang.Theory.S02E01.torrent', downloaded_list)
def execute_server_command(self, server_message): validation_error = self.validate_command(server_message) if validation_error: if validation_error['number'] is not None: return validation_error error = server_message.get('error') if error: self.logger.warning("@ Server return error: %s\t%s" % (error.get('code'), error.get('message'))) command, number = server_message.get('command'), server_message.get( 'number') if command: if self.errors: self.logger.error( "! Server returns command on request containing an error - this is error." ) return {"number": number, "result": False} self.logger.info("Executing command number %i : %s" % (number, str(command))) method = getattr(self, command, None) if not method: method = getattr(self.printer, command) payload = server_message.get('payload') if server_message.get('is_link'): if self.downloader and self.downloader.is_alive(): self.register_error( 108, "Can't start new download, because previous download isn't finished." ) result = False else: if command == 'gcodes': self.printer.set_filename( server_message.get('filename')) self.downloader = downloader.Downloader( self, payload, method, is_zip=bool(server_message.get('zip'))) self.downloader.start() result = True else: if payload: arguments = [payload] else: arguments = [] try: result = method(*arguments) # to reduce needless 'return True' in methods, we assume that return of None, that is a successful call result = result or result == None except Exception as e: message = "! Error while executing command %s, number %d.\t%s" % ( command, number, e.message) self.register_error(109, message, is_blocking=False) self.logger.exception(message) result = False ack = {"number": number, "result": result} return ack
def __init__(self): super().__init__() self.state = { "current_url": None } self.settings = { "geometry": "600x250+400+300", "treeview": [ ["Uploader", 100], ["Title", 190], ["Progress", 70], ["ETA (s)", 50], ["Speed", 70] ] } try: with open("settings.json") as fp: self.settings.update(json.load(fp)) except FileNotFoundError: with open("settings.json", "w") as fp: json.dump(self.settings, fp) self.title("Video Downloader") self.attributes("-topmost", True) self.geometry(self.settings["geometry"]) self.columnconfigure(0, weight=1) self.rowconfigure(1, weight=1) self.minsize(600, 250) self.preview_frame = PreviewFrame(self) self.preview_frame.grid(padx=5, pady=5, sticky="nwe") self.tv = Tree(self, self.settings["treeview"]) self.tv.grid(column=0, row=1, padx=5, pady=5, sticky="nswe") self.bottom_frame = BottomFrame(self) self.bottom_frame.grid(column=0, row=2, sticky="w") self.menu = PopupMenu(self, "cut", "copy", "paste") self.tv_menu = PopupMenu(self, "cancel", "pause", "download_speed") self.pv_thread = downloader.Preview(self.callback) self.dl_thread = downloader.Downloader(self.callback) self.cv_thread = downloader.Converter(self.callback) try: self.pv_thread.add(self.clipboard_get()) except TclError: pass self.bind("<Button-3>", self.popup) self.after(100, self.check_clipboard) self.protocol("WM_DELETE_WINDOW", self.end) self.mainloop()
def subpaer_urls(seed_url): html = downloader.Downloader(cache=pageShe) urls = [ re.findall(r'href="(.*)">', line)[0] for line in re.findall( r'<div class="List2">(.*?)</div>', html, re.DOTALL)[0].split('\n') if re.search(r'href', line) ] return [urljoin(seed_url, url) for url in urls] pass
def __init__(self, url, count): # 初始化下载地址 self.url = url # 初始化爬取数据总数 self.count = count # 初始化下载器 self.downloader = downloader.Downloader() # 初始化输出器 self.outputer = outputer.Outputer()
def _parser_html_(self, content, filename): try: if not os._exists(filename): with open(filename, 'w+') as fp: fp.write(content.encode('utf-8')) fp.close() except Exception as e: self.log.error(e) downloader.Downloader()._logout_banker()
def __handle_upload_idb_start(self, client, idb_name, idb_hash, idb_size): self.__logger.info( "Client {} sent an upload request, starting the upload of file {}, size {}" .format(client.addr, idb_name, idb_size)) local_file_path = os.path.join(self.__idbs_path, idb_name) client.downloader = downloader.Downloader(self.__logger, local_file_path, idb_name, idb_hash, idb_size)
def saveIntoMyHbase(self, link, soup=None): """ save the info form the MySQL into the Hbase,it's hbave the same 'id' :param link: the link are use to save :param soup: the soup of the link if not will download the link first :return: """ sql = "url = '%s'" % (link) sql = "select *from %s where %s" % (MySQL_TABLE, sql) sql_data = self.mdb.selectSQL(MySQL_TABLE, sql)[0] # hbase_save = sql_data["hbase"] # hbase_save = hbase_save if isinstance(hbase_save,str) else hbase_save.encode('utf-8') hbase_save = DEFUALT_FALSE if hbase_save == DEFUALT_TRUE: # if save into hbase,return return pass elif hbase_save == DEFUALT_FALSE: # if not save into hbase,then save download = downloader.Downloader() soup = soup if soup else BeautifulSoup(download(link), htmlparser) urls = getInnerPageURLs(soup)[0][0] url = "URL:%s%s" % (mainHTTP, urls.encode('utf-8')) dhtml = download(url) hbase_save = DEFUALT_TRUE if dhtml else DEFUALT_FALSE dsoup = BeautifulSoup(dhtml, htmlparser) saveTxt = "" for string in dsoup.stripped_strings: dammit = UnicodeDammit(string) saveTxt = saveTxt + "%s\n" % dammit.unicode_markup.encode( 'utf-8') # print saveTxt #self.hdb = hbase_mar.HbaseClient() #if HBASE_TABLE not in self.hdb.get_tables(): # self.hdb.create_table(HBASE_TABLE, "page") for key in sql_data.keys(): if key == 'id': continue v = sql_data[key] if isinstance( sql_data[key], str) else sql_data[key].encode('utf-8') id = "%d" % sql_data['id'] self.hdb.put(HBASE_TABLE, "%s" % id, {"page:%s" % (key): "%s" % v}) else: self.hdb.put(HBASE_TABLE, "%s" % id, {"page:data": "%s" % (saveTxt)}) # show the info about the put #self.hdb.ggetrow(HBASE_TABLE, "%d" % sql_data['id']) print "%s save into hbase" % url pass else: pass pass
def handleService(self): params = self.parser.getParams() name = self.parser.getParam(params, "name") title = self.parser.getParam(params, "title") category = self.parser.getParam(params, "category") page = self.parser.getParam(params, "page") url = self.parser.getParam(params, "url") vtitle = self.parser.getParam(params, "vtitle") service = self.parser.getParam(params, "service") action = self.parser.getParam(params, "action") path = self.parser.getParam(params, "path") if name == None: self.listsABCMenu(self.cm.makeABCList()) if name == 'abc-menu': self.showSerialTitles(category) elif name == 'serial-title': self.showSeason(page, category) elif name == 'serial-season' and title != None and page != None: self.showSerialParts(page, title, category) if name == 'playSelectedMovie': nUrl = mainUrl + page linkVideo = '' ID = '' ID = self.getVideoID(nUrl) #print str (ID) if (ID!=False): if ID != '': linkVideo = self.up.getVideoLink(ID) if linkVideo != False: self.LOAD_AND_PLAY_VIDEO(linkVideo, title) else: d = xbmcgui.Dialog() d.ok('Brak linku', SERVICE + ' - tymczasowo wyczerpałeś limit ilości uruchamianych seriali.', 'Zapraszamy za godzinę.') if service == SERVICE and action == 'download' and url != '': self.cm.checkDir(os.path.join(dstpath, SERVICE)) if dbg == 'true': log.info(SERVICE + ' - handleService()[download][0] -> title: ' + urllib.unquote_plus(vtitle)) log.info(SERVICE + ' - handleService()[download][0] -> url: ' + urllib.unquote_plus(url)) log.info(SERVICE + ' - handleService()[download][0] -> path: ' + path) if urllib.unquote_plus(url).startswith('/'): urlTempVideo = self.getVideoID(mainUrl + urllib.unquote_plus(url)) linkVideo = self.up.getVideoLink(urlTempVideo) if dbg == 'true': log.info(SERVICE + ' - handleService()[download][1] -> title: ' + urllib.unquote_plus(vtitle)) log.info(SERVICE + ' - handleService()[download][1] -> temp url: ' + urlTempVideo) log.info(SERVICE + ' - handleService()[download][1] -> url: ' + linkVideo) if linkVideo != False: if dbg == 'true': log.info(SERVICE + ' - handleService()[download][2] -> title: ' + urllib.unquote_plus(vtitle)) log.info(SERVICE + ' - handleService()[download][2] -> url: ' + linkVideo) log.info(SERVICE + ' - handleService()[download][2] -> path: ' + path) dwnl = downloader.Downloader() dwnl.getFile({ 'title': urllib.unquote_plus(vtitle), 'url': linkVideo, 'path': path })
def setUp(self): test_dir = os.listdir(os.getcwd()) if 'settings.json' in test_dir: os.rename('settings.json', 'settings.json.bkp') if 'watchlist.json' in test_dir: os.rename('watchlist.json', 'watchlist.json.bkp') self.settings = sad.Settings() self.wl = sad.Watchlist( series_list=['Breaking.Bad', 'The.Big.Bang.Theory']) self.downloader = sad.Downloader()
def __init__(self, **kwargs): self.dl = downloader.Downloader() self.r = run.Run() self.iasl_url = "https://bitbucket.org/RehabMan/acpica/downloads/iasl.zip" self.iasl = self.check_iasl() if not self.iasl: return None self.dsdt = None self.dsdt_raw = None self.dsdt_lines = None
def __init__(self, master=None): super().__init__(master) self.master = master self.master.geometry("500x250") self.grid() self.row = 0 self.quality = tk.StringVar(self, value="480p") self.isAudio = tk.BooleanVar(self) self.descargador = downloader.Downloader( pathlib.Path().absolute().joinpath('descargas')) self.create_widgets()
def gcodes(self, gcodes_or_link, is_link = False): if is_link: if self.is_downloading(): self.logger.error('Download command received while downloading processing. Aborting...') return False else: self.downloader = downloader.Downloader(self, gcodes_or_link) self.downloader.start() else: gcodes = base64.b64decode(gcodes_or_link) self.unbuffered_gcodes(gcodes)
def test_fetched_download_returns_the_searched_tv_series(self): '''When downloader searches for a tv series it must garantee it is being returned by the fetcher''' os.remove('watchlist.json') name = 'The.100' self.wl = sad.Watchlist(series_list=[name]) self.wl.update_watchlist(name, 'S03E00') self.downloader = sad.Downloader() self.downloader.action = 'show_magnets' magnet_list = self.downloader.run() self.assertIn(name.replace('.', '+').lower(), magnet_list)
def __init__(self, spider, node_manager, schedule): self.settings = node_manager.settings self.node_manager = node_manager self.status = EngineStatusClient() self.stats = load_object(self.settings['STATS_CLASS'])(self) self.spider = spider self.scheduler = schedule self.signals = SignalManager(self) self.downloader = downloader.Downloader(self) self.extension_manager = ExtensionManager(self) self.scraper = scrapy.Scraper(self, spider)
def main(url_to_m3u8, download_dir, verbose): """ :type url_to_m3u8: str :type download_dir: str :type verbose: bool :rtype: None """ global DOWNLOADER DOWNLOADER = downloader.Downloader(download_dir=download_dir) logging.basicConfig(level=logging.INFO if verbose else logging.WARNING) process_main_playlist(url_to_m3u8)
def download(self): if self.cache and self.seed_url in self.cache.keys(): self.html = self.cache[self.seed_url] print "Get", self.seed_url, "from cache..." pass else: self.html = downloader.Downloader()(self.seed_url) self.cache[self.seed_url] = self.html print "Download", self.seed_url, "..." pass pass
def __init__(self, **kwargs): self.dl = downloader.Downloader() self.r = run.Run() self.u = utils.Utils("SSDT Time") self.iasl_url_macOS = "https://bitbucket.org/RehabMan/acpica/downloads/iasl.zip" self.iasl_url_linux = "http://amdosx.kellynet.nl/iasl.zip" self.iasl_url_windows = "https://acpica.org/sites/acpica/files/iasl-win-20200528.zip" self.iasl = self.check_iasl() if not self.iasl: raise Exception("Could not locate or download iasl!") self.dsdt = None self.dsdt_raw = None self.dsdt_lines = None
def target_depot(): max_urls = 5 D = downloader.Downloader() zipped_data = D('http://www.luckyegg.net/places/top-1mcsv.zip') urls = [] # top 1 million URL's will be stored in this list with zipfile.ZipFile(io.BytesIO(zipped_data)) as zf: csv_filename = zf.namelist()[0] for _, website in csv.reader(io.TextIOWrapper(zf.open(csv_filename))): urls.append('http://' + website) if len(urls) == max_urls: break print('目标网站:', len(urls), website) return urls
def __init__(self): self.config = configparser.ConfigParser() self.downloader = downloader.Downloader() self.download_location = os.getcwd()+"\Downloads\\" root = self.root = Gui(title="URL Link Grabber") root.set_version("Version 0.2 6-11-2017") root.window.protocol("WM_DELETE_WINDOW", self.quit) root.button_get_links.config(command=self.get_links) root.button_filter.config(command=self.filter) root.button_save.config(command=self.save_links) root.button_show_in_explorer.config(command=self.open_explorer) root.window.bind('<Return>', self.get_links)
def __init__(self): self.settings = settings.Settings() list.__init__( self, [ Channel("BBC TV", "tv", self.settings), Channel("ITV", "itv", self.settings), # Channel("Channel 4", "ch4", self.settings), # Channel("Five", "five", self.settings), # Channel("BBC Podcasts", "podcast", self.settings), Channel("BBC Radio", "radio", self.settings), # Channel("Hulu", "hulu", self.settings), ]) self.downloader = downloader.Downloader(self.settings) self.streamer = streamer.Streamer(self.settings)
def __init__(self, url, ext): #~ parser = argsparser.ArgsParser() #~ args = sys.argv #toma los argumentos de la consola if (url.startswith("www.")): url = "http://" + url if (not ext.startswith(".")): ext = "." + ext if (url == "" or ext == ""): print "Debe informar url y extension con --url= y --ext= ." contentDownl = downloader.Downloader() count = contentDownl.downloadAll(url, ext) print "Se han descargado " + str(count) + " archivos con extension "\ + ext + " de la url \"" + url + "\""
def _get_result_(self, soup, point): try: self.log.info('get result') tables = soup.find_all('table') for table in tables: if isinstance(table, bs4.element.Tag): for tr in table.find_all('tr'): string = '' if isinstance(tr, bs4.element.Tag): for td in tr.find_all('td'): if isinstance(td, bs4.element.Tag): string = string + ':\t' + td.text print(string) except Exception as e: self.log.error(e) downloader.Downloader()._logout_banker()
def download_file(): user_id = get_user_id(request.remote_addr) print('ip: %s downloading' % (user_id)) url = request.args.get('url', None) if not test_url(url): return error_json('Please Enter A Valid YouTube URL') print(url) if user_id in DOWNLOADS: return error_json('Already downloading') yt = downloader.Downloader(url, user_id) DOWNLOADS[user_id] = yt title = yt.title + '.mp4' yt.start() fp = '%s.mp4' % (user_id) _json = {"fp": fp, "title": title} return json.dumps(_json)
def link_crawler( start_url, link_regex, delay=5, robots_url_suffix="robots.txt", user_agent="wswp", max_depth=5, scrape_callback=None, num_retries=3, ): seen = {} crawler_queue = queue.Queue() crawler_queue.put(start_url) headers = {"User-Agent": user_agent} D = downloader.Downloader(headers) protocol, domain, *_ = parse.urlsplit(start_url) robots_url = parse.urlunsplit( (protocol, domain, robots_url_suffix, "", "")) rp = parse_robots(robots_url) while not crawler_queue.empty(): url = crawler_queue.get() if rp and not rp.can_fetch(user_agent, url): print(f"blocked by robots.txt {url}") continue html = D(url, num_retries) if scrape_callback: scrape_callback(url, html) depth = seen.get(url, 0) if depth == max_depth: print(f"touch max depth {url}") continue for link in get_links(html): if link and re.match(link_regex, link): abs_link = parse.urljoin(url, link) if abs_link not in seen: crawler_queue.put(abs_link) seen[abs_link] = depth + 1