def download(filename=JSONCACHE, metadata=None): """ Download the file. """ if not metadata: metadata = get_metadata() if not metadata: # Bail, since we get the URI from the metadata. return False req = urllib.request.Request(metadata["download_uri"]) try: with send_req(req) as response: os.makedirs(os.path.dirname(filename), exist_ok=True) with open(filename + ".tmp", 'wb') as f: block_size = 8192 pbar = progressbar.DataTransferBar().start() count = 0 while True: chunk = response.read(block_size) if not chunk: break f.write(chunk) count += 1 pbar.update(value=count * block_size) pbar.finish() os.rename(filename + ".tmp", filename) save_metadata(metadata) return True except urllib.error.URLError as e: _logger.error("Error retrieving JSON file: {}".format(e)) return False
def call_and_put(tmp_fn): # in order to avoid race conditions we move the mtime # of the temporal file into the past the_past = int(time.time() - 2) os.utime(tmp_fn, times=(the_past, the_past)) cb(tmp_fn) stat_after = os.stat(tmp_fn) if (stat_after.st_mtime > the_past): if backup is not None: self._dbfs.mv(src, src + backup, overwrite=True) with progressbar.DataTransferBar() as bar: def update_cb(size, bytes_copied): bar.max_value = size bar.update(bytes_copied) self._dbfs.put(tmp_fn, src, overwrite=True, update_cb=update_cb) else: raise Exception(f"File was not modified!")
def do_get(self, overwrite, src, target): """ get [OPTS] src [target] Copies the given remote file to the local system. Supported options are as follows: -o, --overwrite When a file already exists at the target location, it is overwritten. """ if os.path.isdir(target): target = os.path.join(target, os.path.basename(src)) parent_dir = os.path.dirname(target) fastdbfs.util.mkdirs(parent_dir) with progressbar.DataTransferBar() as bar: def update_cb(size, bytes_copied): bar.max_value = size bar.update(bytes_copied) self._dbfs.get(src, target, overwrite=overwrite, update_cb=update_cb)
def do_put(self, overwrite, src, target): """ put [OPTS] src [target] Copies the given local file to the remote system. Supported options are: -o, --overwrite When a file already exists at the target location, it is overwritten. """ try: if self._dbfs.filetest_d(target): target = os.path.join(target, posixpath.basename(src)) except: pass with progressbar.DataTransferBar() as bar: def update_cb(size, bytes_copied): bar.max_value = size bar.update(bytes_copied) self._dbfs.put(src, target, overwrite=overwrite, update_cb=update_cb)
def show_progress(count, block_size, total_size): if progress['bar'] is None: progress['bar'] = progressbar.DataTransferBar(max_value=total_size) progress['downloaded'] = count * block_size if progress['downloaded'] >= total_size: progress['bar'].finish() progress['bar'] = None progress['downloaded'] = 0 else: progress['bar'].update(progress['downloaded'])
def initializee_progress_bar(max_value): if max_value is None: max_value = progressbar.UnknownLength widgets = [ progressbar.Percentage(), ' (', progressbar.DataSize(), ' of', progressbar.DataSize('max_value'), ' )', progressbar.AdaptiveTransferSpeed(), progressbar.Bar(marker='█'), progressbar.Timer(), ' ', progressbar.AdaptiveETA() ] return progressbar.DataTransferBar(max_value=max_value, widgets=widgets)
def download_stream(yt: YouTube, stream: Stream): with progressbar.DataTransferBar(max_value=stream.filesize) as bar: def on_progress(stream, chunk: bytes, bytes_remaining: int): # bar.update(len(chunk)) bar.update(stream.filesize - bytes_remaining) def on_complete(stream, file_path: str): bar.finish() print(f'Downloaded at {file_path}') yt.register_on_progress_callback(on_progress) yt.register_on_complete_callback(on_complete) stream.download()
def _get_to_temp(self, src, suffix=None, **mkstemp_args): if suffix is None: bn = posixpath.basename(src) try: suffix = bn[bn.rindex("."):] except: suffix = "" with progressbar.DataTransferBar() as bar: def update_cb(size, bytes_copied): bar.max_value = size bar.update(bytes_copied) return self._dbfs.get_to_temp(src, suffix=suffix, **mkstemp_args)
def read_from_socket(sock, file, filesize): logger.debug('Receiving file...') with progressbar.DataTransferBar(min_value=0, max_value=filesize) as bar: read_size = 0 data = sock.recv(CHUNK_SIZE) if not data: raise RuntimeError('No data from server') if filesize < CHUNK_SIZE: read_size = filesize while (data): file.write(data) bar.update(read_size) read_size += min(CHUNK_SIZE, filesize - read_size) data = sock.recv(CHUNK_SIZE) logger.debug('Finished transfer')
def crc(path): log.debug('Calculating CRC32 value') checksum = 0 fsize = os.path.getsize(path) i = 0 chunk_size = 4 * 2**20 with open(path, 'rb') as f: with progressbar.DataTransferBar(max_value=fsize, max_error=False) as bar: while True: i += 1 data = f.read(chunk_size) if not data: return checksum & 0xFFFFFFFF bar.update(i * chunk_size) checksum = crc32(data, checksum)
def combine_particle_subsamples(download_path, extracted_files, size_bytes, output_name=None): """cat files in 'extracted_files' together into new file, then delete extracted_files.""" output_file = download_path.with_name(output_name) expected_output_size = np.sum(size_bytes) ## check whether output_file already exists *and* is complete if os.path.exists(output_file) and os.path.getsize( output_file) == expected_output_size: print(f"\tSkipping files, already combined.") return ## combine files print("\tcombining extracted files...") print(f"\t\toutput to: {output_file}") bar = progressbar.DataTransferBar(max_value=expected_output_size) with open(output_file, 'wb') as output_fp: bar.update(0) for i, filename in enumerate(extracted_files): input_file = download_path.with_name(str(filename)) with open(input_file, 'rb') as input_fp: shutil.copyfileobj(input_fp, output_fp) bar.update(np.sum(size_bytes[:i])) bar.finish() ## delete extracted_files for filename in extracted_files: input_file = download_path.with_name(str(filename)) input_file.unlink()
def _process_esc50(esc_50_path, save_path): """ Processes the 2000 5-sec clips of the esc50 dataset and dumps a pickle with the metadata for each audio file. The sample rate is hard-coded to 22050. Taken with permission from 'https://github.com/karoldvl/paper-2015-esc-convnet' with minor adaptions. Args: esc_50_path (str): path to the base folder containing the class-specific subfolders. save_path (str): folder in which the esc50_audio.dat and the esc50_meta.pkl files will be saved. """ rows_meta = [] rows_audio = [] category_counter = 0 for directory in sorted(os.listdir(esc_50_path)): directory = os.path.join(esc_50_path, directory) if not (os.path.isdir(directory) and os.path.basename(directory)[0:3].isdigit()): continue print('Parsing ' + directory) bar = progressbar.DataTransferBar(max_value=len(os.listdir(directory))) for i, clip in enumerate(sorted(os.listdir(directory))): if clip[-3:] != 'ogg': continue filepath = '{0}/{1}'.format(directory, clip) filename = os.path.basename(filepath) fold = filename[0] category = category_counter category_name = os.path.dirname(filepath).split('/')[-1] rows_meta.append( pd.DataFrame( { 'filename': filename, 'fold': fold, 'category': category, 'category_name': category_name }, index=[0])) rows_audio.append( load_audio(filepath, 5000, framerate=22050, channel_nr=1)) bar.update(i) bar.finish() # libc.malloc_trim(0) rows_meta = [pd.concat(rows_meta, ignore_index=True)] rows_audio = [np.vstack(rows_audio)] category_counter = category_counter + 1 rows_meta = rows_meta[0] rows_meta[['category', 'fold']] = rows_meta[['category', 'fold']].astype(int) rows_meta.to_pickle(os.path.join(save_path, 'esc50_meta.pkl')) mm = np.memmap(os.path.join(save_path, 'esc50_audio.dat'), dtype='float32', mode='w+', shape=(2000, 110250)) mm[:] = rows_audio[0][:] mm.flush() del rows_audio print('processed and saved')
else: curl = pycurl.Curl() curl.setopt(pycurl.URL, data_product_url) curl.setopt(pycurl.FOLLOWLOCATION, 1) curl.setopt(pycurl.MAXREDIRS, 5) curl.setopt(pycurl.CONNECTTIMEOUT, 30) if os.path.exists(download_path): fp = open(download_path, 'ab') curl.setopt(pycurl.RESUME_FROM, os.path.getsize(download_path)) else: fp = open(download_path, 'wb') bar = progressbar.DataTransferBar( max_value=remote_filesize) initial_size = os.path.getsize(download_path) def progress(total, existing, upload_t, upload_d): downloaded = existing + initial_size bar.update(downloaded) curl.setopt(pycurl.NOPROGRESS, 0) curl.setopt(pycurl.PROGRESSFUNCTION, progress) curl.setopt(pycurl.WRITEDATA, fp) print("\tdownloading...") curl.perform() curl.close()
if args.recursive: paths = [ p for path in args.inputs for p in path.glob("**/*.tar.gz") if p.is_file() ] else: paths = args.inputs print("Calulating total file size...") total_size = sum([ member.size for path in progressbar.progressbar(paths) for member in tarfile.open(path).getmembers() if member.isfile() ]) total_success = 0 total_failed = 0 with progressbar.DataTransferBar(max_value=total_size) as progress: for path in paths: print(f"Importing dataset {path}") tf = tarfile.open(path) for m in tf.getmembers(): if m.isfile(): print(f"- Importing member file {m.name}...") logfile = f"{path}/{m.name}" mf = tf.extractfile(m) def generate_actions(f, progress): for line in f: source = json.loads(line) source["log"] = { "file": { "name": logfile }} source.setdefault("winlog", dict()) # Plain data created by nxlog is completely moved to winlog.event_data except blacklisted
def download_file(file, download_root): """Download a Box file, saving it with its 'intrinsic' path inside local_root. Arguments: file {File} -- the Box file to download. """ ## determine remote url data_product_url = file.get_download_url() ## determine local download location download_path = download_root / PurePosixPath( get_path(file, truncate_prefix=1)) print(f"download to: {download_path}") if not os.path.exists(download_path.parent): os.makedirs(download_path.parent) ## check if file is already completely downloaded: c = pycurl.Curl() c.setopt(c.URL, data_product_url) c.setopt(c.NOBODY, 1) # get headers only c.perform() remote_filesize = c.getinfo(c.CONTENT_LENGTH_DOWNLOAD) if (os.path.exists(download_path) and os.path.getsize(download_path) == remote_filesize): print("\tSkipping file, already downloaded.") else: curl = pycurl.Curl() curl.setopt(pycurl.URL, data_product_url) curl.setopt(pycurl.FOLLOWLOCATION, 1) curl.setopt(pycurl.MAXREDIRS, 5) curl.setopt(pycurl.CONNECTTIMEOUT, 30) if os.path.exists(download_path): fp = open(download_path, 'ab') curl.setopt(pycurl.RESUME_FROM, os.path.getsize(download_path)) else: fp = open(download_path, 'wb') bar = progressbar.DataTransferBar(max_value=remote_filesize) initial_size = os.path.getsize(download_path) def progress(total, existing, upload_t, upload_d): downloaded = existing + initial_size bar.update(downloaded) curl.setopt(pycurl.NOPROGRESS, 0) curl.setopt(pycurl.PROGRESSFUNCTION, progress) curl.setopt(pycurl.WRITEDATA, fp) print("\tdownloading...") curl.perform() curl.close() fp.close() bar.finish() print(f"")