def fast_download(url,filename): chunk_count = 1 downloader = Downloader(url, filename, chunk_count) downloader.start_sync() #downloader.start() #downloader.subscribe(callback, callback_threshold) #downloader.wait_for_finish() return;
def download_file_parallel(url, target_path, show_progress=False, num_threads=1): """ Download the file from the given `url` and store it at `target_path`. Return a tuple x (url, bool, str). x[0] contains the url. If download failed x[1] is ``False`` and x[2] contains some error message. If download was fine x[1] is ``True`` and x[2] contains the target-path. """ downloader = Downloader(url, target_path, num_threads) downloader.start() if show_progress: # # Wait until we know file size # while downloader.total_length == 0: pass pbar = tqdm(total=downloader.total_length, desc='Download File', unit_scale=True) def update_pbar(x): pbar.update(x.total_downloaded - pbar.n) downloader.subscribe(update_pbar, 10) downloader.wait_for_finish() if show_progress: pbar.close() return (url, True, target_path)
def download_file_parallel(url, target_path, num_threads=1): """ Download the file from the given `url` and store it at `target_path`. Return a tuple x (url, bool, str). x[0] contains the url. If download failed x[1] is ``False`` and x[2] contains some error message. If download was fine x[1] is ``True`` and x[2] contains the target-path. """ downloader = Downloader(url, target_path, num_threads) downloader.start() # Wait until we know file size while downloader.total_length == 0: pass file_size = downloader.total_length logger.info('Download file from "%s" with size: %d B', url, file_size) bytes_at_last_log = 0 def callback(x): nonlocal bytes_at_last_log if x.total_downloaded - bytes_at_last_log >= PROGRESS_LOGGER_BYTE_DELAY: logger.info('Download [%06.2f%%]', x.total_downloaded / file_size * 100) bytes_at_last_log = x.total_downloaded downloader.subscribe(callback, 10) downloader.wait_for_finish() logger.info('Finished download') return (url, True, target_path)
async def task_job(self, anime, episde_num, url, base_path): final_url = url % episde_num self.log.debug("Task for %s episode %d", anime, episde_num) if not await self.check_ok(final_url): self.log.error( f"Failed to fetch data of episode {episde_num} of {anime}") return downloader = Downloader(final_url, join(base_path, f'{episde_num:03}.mp4'), self.chunck_count) self.tasks[(anime, episde_num)] = downloader downloader.start() self.log.debug("Downloading %s episode %d", anime, episde_num) await sync_to_async(downloader.wait_for_finish)() await self.send_message_async( f"The episode {episde_num} of {anime} was downloaded") del self.tasks[(anime, episde_num)]
def download_file(url, target_path): downloader = Downloader(url, target_path, 8) downloader.start() while downloader.total_length == 0: pass pbar = tqdm(total=downloader.total_length, desc='Download File', unit_scale=True) def update_pbar(x): pbar.update(x.total_downloaded - pbar.n) downloader.subscribe(update_pbar, 10) downloader.wait_for_finish() pbar.close()
def d(self, item, vpath, jpath, spider): #/usr/local/lib/python3.6/site-packages/pget h = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36', 'Referer': 'https://www.google.com/' } # proxies=spider.proxies downloader = Downloader(item['url_video'], vpath, chunk_count=0, high_speed=True, headers=h, proxies=spider.proxies) downloader.start() downloader.wait_for_finish() sec = self.check_video(vpath) if not sec: return with open(jpath, 'w+') as f: _t = int(time.time()) sig = "sc%7*g{}@!$%".format(_t) md = hashlib.md5() md.update(sig.encode(encoding='utf-8')) sign = md.hexdigest() # 2 post_dict = { 'time': _t, 'sig': sign, 'name': item['videoName'], 'area': 'us', 'cate': 'Beauty', 'year': 2019, 'director': '', 'actor': '', 'type': 'movie', 'total': 1, 'cover_url': item['name'] + '.jpg', 'grade': 2.0, 'mins': sec, 'source_url': item['name'] + '.mp4', 'resolution': item['resolution'], 'part': 1, 'intro': '' } json.dump(post_dict, f) spider.all_data.append(item['name']) write_data.open_data('dataList', item['name'] + '\n')
def readExtractedfiles(): #print getCodesInCSVsForAllDatasets(quandl_apikey) f_list = [] folderconvey = getCodesInCSVsForAllDatasets(quandl_apikey) for key in folderconvey.keys(): f_list.append(key) #print f_list q_data_base_URL = "https://www.quandl.com/api/v3/datasets/{0}" filenamesList = [] for (dirpath, dirnames, filenames) in walk(DEFAULT_DATA_PATH): filenamesList.extend(filenames) try: for fn in filenamesList: print fn try: dataset_qcodes = [] logging.info(fn + " extracted.") codesFile = os.path.abspath(os.path.join( DEFAULT_DATA_PATH, fn)) with open(codesFile, 'r') as csv_file: csvlines = csv_file.readlines() for num, line in enumerate(csvlines[:5]): codeline = line.split(',') if len(codeline) > 1: dataset_code = codeline[0] dataset_descrpn = codeline[1] download_url = q_data_base_URL.format(dataset_code) data_URL = download_url + "?api_key=" + quandl_apikey time.sleep(1) resp = os.popen("curl " + data_URL) resp_data = resp.read() json_data = json.loads(resp_data) #folderconvey = getCodesInCSVsForAllDatasets(quandl_apikey) foldername = json_data["dataset"]["name"] dat_code = json_data["dataset"]["database_code"] #foldername = (foldername.replace('-', '').replace(' ', '_')).lower() foldername = re.sub("[^A-Za-z0-9 ]+", "", foldername) foldername = re.sub(" +", " ", foldername).replace( " ", "_").lower() print ">>>>>>>" + foldername for name in f_list: if name == dat_code: out_fldr_name = folderconvey[name] out_fldr_name = re.sub( "[^A-Za-z0-9 ]+", "", out_fldr_name) out_fldr_name = re.sub( " +", " ", out_fldr_name).replace(" ", "_").lower() try: os.chdir(rootfolder) if not os.path.isdir(out_fldr_name): os.mkdir(out_fldr_name) os.chdir(out_fldr_name) if not os.path.isdir(foldername): os.mkdir(foldername) os.chdir(foldername) except WindowsError: continue fileformat = ".csv" if not os.path.isfile( dataset_code.split('/')[1] + '-datasets-codes' + fileformat): urll = download_url + "/data.csv" downloader = Downloader( urll, dataset_code.split('/')[1] + fileformat, 8) downloader.start() downloader.wait_for_finish() except: raise continue except: pass
def start_update(): '''Withdraw main window and start update download''' nonlocal button_frame nonlocal cancel_button nonlocal update_button nonlocal debug nonlocal sv_version master.withdraw() try: os.remove('update.zip') except OSError: pass install = True # For development purposes if not bundle and debug: if not msgbox.askyesno('', 'Install update?'): install = False cancel_button.destroy() update_button.destroy() def cancel(): if msgbox.askokcancel(_('Cancel'), _('Are you sure to cancel?')): os._exit(0) # There's no cancel button so we use close button as one instead updatewindow.protocol("WM_DELETE_WINDOW", cancel) # Define progress variables dl_p = tk.IntVar() dl_p.set(0) dl_pbar = ttk.Progressbar(button_frame, length=150, orient=tk.HORIZONTAL, variable=dl_p) dl_pbar.pack(side='left', padx=5) dl_prog_var = tk.StringVar() dl_prog_var.set('-------- / --------') dl_prog = tk.Label(button_frame, textvariable=dl_prog_var) dl_speed_var = tk.StringVar() dl_speed_var.set('---------') dl_speed = tk.Label(button_frame, textvariable=dl_speed_var) dl_prog.pack(side='right', padx=5) dl_speed.pack(side='right', padx=5) master.update() dl_url = f'https://github.com/sw2719/steam-account-switcher/releases/download/v{sv_version}/Steam_Account_Switcher_v{sv_version}.zip' try: r = req.get(dl_url, stream=True) r.raise_for_status() total_size = int(r.headers.get('content-length')) total_in_MB = round(total_size / 1048576, 1) except req.RequestException: msgbox.showerror(_('Error'), _('Error occured while downloading update.')) os._exit(1) if round(total_in_MB, 1).is_integer(): total_in_MB = int(total_in_MB) def launch_updater(): if not install: return while not os.path.isfile('update.zip'): sleep(1) try: archive = os.path.join(os.getcwd(), 'update.zip') f = zf.ZipFile(archive, mode='r') f.extractall(members=(member for member in f.namelist() if 'updater' in member)) os.execv('updater/updater.exe', sys.argv) except (FileNotFoundError, zf.BadZipfile, OSError): error_msg(_('Error'), _("Couldn't perform automatic update.") + '\n' + _('Update manually by extracting update.zip file.')) def dl_callback(downloader): nonlocal total_in_MB current_size = downloader.total_downloaded current_in_MB = round(current_size / 1048576, 1) if round(current_in_MB, 1).is_integer(): current_in_MB = int(current_in_MB) perc = int(current_size / total_size * 100) prog = f'{current_in_MB}MB / {total_in_MB}MB' dl_p.set(perc) dl_prog_var.set(prog) dl_speed_var.set(downloader.readable_speed + '/s') master.update() if perc == 100 and downloader.total_merged == downloader.total_length: launch_updater() downloader = Downloader(dl_url, 'update.zip', 8) downloader.subscribe(dl_callback) downloader.start()
def LLloop(): LLqueue = open(os.path.dirname(__file__) + '/cgi-bin/LLprogress', "r+", encoding='utf-8') lines = LLqueue.read() item = lines.split('\n', 1)[0] if len(item) > 10: LL_item = json.loads(item) url = LL_item[0] filename = 'files' + LL_item[1] + '/' + os.path.basename(LL_item[0]) chunk = int(LL_item[2]) type = int(LL_item[3]) print('Start Downloading [' + url + ']') if type == 1: downloader = Downloader(url, filename, chunk) downloader.start() downloader.wait_for_finish() if type == 2: LLcurl = 'curl -k ' + url + ' --output ' + filename os.system(LLcurl) datastring = '["' + url + '"' + ',' + '"' + filename + '"' + ',' + '"' + LL_item[ 2] + '"' + ',' + '"' + LL_item[3] + '"' + "]\n" if os.path.isfile(filename): LLqueue = open(os.path.dirname(__file__) + '/cgi-bin/LLresult', "a", encoding='utf-8') LLqueue.write(datastring) LLqueue.close() else: LLqueue = open(os.path.dirname(__file__) + '/cgi-bin/LLerrors', "a", encoding='utf-8') LLqueue.write(datastring) LLqueue.close() print('Checking for next ...') else: print('Waiting mode ...') LLqueue = open(os.path.dirname(__file__) + '/cgi-bin/LLqueue', "r+", encoding='utf-8') lines = LLqueue.read() new_item = lines.split('\n', 1)[0] if len(new_item) > 10: LLprogress = open(os.path.dirname(__file__) + '/cgi-bin/LLprogress', "w", encoding='utf-8') LLprogress.write(new_item + "\n") LLprogress.close() LLqueue.close() LLqueue_old = open(os.path.dirname(__file__) + '/cgi-bin/LLqueue', "r+", encoding='utf-8') LLqueue_old.readline() new_LLqueue = open(os.path.dirname(__file__) + '/cgi-bin/LLqueue', "w", encoding='utf-8') shutil.copyfileobj(LLqueue_old, new_LLqueue) else: LLprogress = open(os.path.dirname(__file__) + '/cgi-bin/LLprogress', "w", encoding='utf-8') LLprogress.write('' + "\n") LLprogress.close() print('No new Item !') time.sleep(1)
name = n.text if name and original_url: fileformat = ".csv" filename = re.sub("[^A-Za-z0-9 ]", "", name).lower().replace( " ", "_") + fileformat if original_url.split(".")[-1] == 'csv': os.chdir(rootfolder) if not os.path.isdir(filename): os.mkdir(filename) os.chdir(filename) if not os.path.isfile(filename): downloader = Downloader(original_url, filename, 8) downloader.start() print "came here" print "downloading file " + filename downloader.wait_for_finish() '''if original_url.split(".")[-1] == 'zip': #url = urllib.urlopen(original_url) #zip_file = ZipFile(StringIO(url.read())) #files = zipfile.namelist() #fopen = open(filename+'.csv', 'w') #zipcontent = url.read() downloader = Downloader(original_url, filename+".zip", 8) downloader.start() downloader.wait_for_finish() print "comitted here"
def extractFromJSON(domain, datasets_colln): datasets_cursor = datasets_colln.find() print(datasets_cursor) #files_download = 10 for dataset in datasets_cursor: """if files_download == 0: break files_download = files_download - 1""" dataset_name = dataset["name"] print(">>>> " + dataset_name) available_formats = {} res_format = None res_urls = {} other_formats = {} for i, res in enumerate(dataset["resources"]): available_formats[i] = { "format": res["format"], "url" : res["url"], "filename" : res["id"] } #print(available_formats) for a in available_formats.values(): if 'JSONL' in a["format"]: res_format = 'JSONL' break elif 'jsonl' in a["format"]: res_format = 'jsonl' break elif 'CSV' in a["format"]: res_format = 'CSV' break elif 'csv' in a["format"]: res_format = 'csv' break elif 'JSON' in a["format"]: res_format = 'JSON' break elif 'json' in a["format"]: res_format = 'json' break """elif 'XLS' in a["format"]: res_format = 'XLS' break elif 'xls' in a["format"]: res_format = 'xls' break else: other_formats[a["url"]] = a["filename"] with open("fileformats.txt", "a+") as fileformats: fileformats.write(a["filename"] + ',' + a["format"] + ',' \ + a["url"] + '\n')""" if res_format is None: continue #res_urls = other_formats else: #continue for a in available_formats.values(): if res_format in a["format"]: res_urls[a["url"]] = a["filename"] os.chdir(root_folder) if not os.path.isdir(dataset_name): os.mkdir(dataset_name) os.chdir(dataset_name) print(str(res_format) + " :: " + str(len(res_urls))) for res_url in res_urls.keys(): if res_format is None: file_name = res_urls[res_url] else: file_name = res_urls[res_url] + "." + (res_format).lower() print("Downloading... " + file_name) #print("... from >> " + res_url) try: if not os.path.isfile(file_name): sleep(1) downloader = Downloader(res_url, file_name, 8) downloader.start() downloader.wait_for_finish() """resp = urllib.request.urlopen(res_url) resp_content = resp.read() print("Writing...") with open(file_name, 'wb') as res_file: res_file.write(resp_content)""" except: print("Error @ " + dataset_name) continue