Beispiel #1
0
def download(filename=JSONCACHE, metadata=None):
    """ Download the file. """
    if not metadata:
        metadata = get_metadata()
        if not metadata:
            # Bail, since we get the URI from the metadata.
            return False

    req = urllib.request.Request(metadata["download_uri"])
    try:
        with send_req(req) as response:
            os.makedirs(os.path.dirname(filename), exist_ok=True)
            with open(filename + ".tmp", 'wb') as f:
                block_size = 8192
                pbar = progressbar.DataTransferBar().start()
                count = 0
                while True:
                    chunk = response.read(block_size)
                    if not chunk:
                        break
                    f.write(chunk)
                    count += 1
                    pbar.update(value=count * block_size)
                pbar.finish()
                os.rename(filename + ".tmp", filename)
                save_metadata(metadata)
                return True
    except urllib.error.URLError as e:
        _logger.error("Error retrieving JSON file: {}".format(e))
        return False
Beispiel #2
0
        def call_and_put(tmp_fn):
            # in order to avoid race conditions we move the mtime
            # of the temporal file into the past
            the_past = int(time.time() - 2)
            os.utime(tmp_fn, times=(the_past, the_past))

            cb(tmp_fn)

            stat_after = os.stat(tmp_fn)
            if (stat_after.st_mtime > the_past):

                if backup is not None:
                    self._dbfs.mv(src, src + backup, overwrite=True)

                with progressbar.DataTransferBar() as bar:

                    def update_cb(size, bytes_copied):
                        bar.max_value = size
                        bar.update(bytes_copied)

                    self._dbfs.put(tmp_fn,
                                   src,
                                   overwrite=True,
                                   update_cb=update_cb)

            else:
                raise Exception(f"File was not modified!")
Beispiel #3
0
    def do_get(self, overwrite, src, target):
        """
        get [OPTS] src [target]

        Copies the given remote file to the local system.

        Supported options are as follows:

          -o, --overwrite  When a file already exists at the target
                           location, it is overwritten.
        """

        if os.path.isdir(target):
            target = os.path.join(target, os.path.basename(src))

        parent_dir = os.path.dirname(target)
        fastdbfs.util.mkdirs(parent_dir)

        with progressbar.DataTransferBar() as bar:

            def update_cb(size, bytes_copied):
                bar.max_value = size
                bar.update(bytes_copied)

            self._dbfs.get(src,
                           target,
                           overwrite=overwrite,
                           update_cb=update_cb)
Beispiel #4
0
    def do_put(self, overwrite, src, target):
        """
        put [OPTS] src [target]

        Copies the given local file to the remote system.

        Supported options are:

          -o, --overwrite  When a file already exists at the target
                           location, it is overwritten.
        """

        try:
            if self._dbfs.filetest_d(target):
                target = os.path.join(target, posixpath.basename(src))
        except:
            pass

        with progressbar.DataTransferBar() as bar:

            def update_cb(size, bytes_copied):
                bar.max_value = size
                bar.update(bytes_copied)

            self._dbfs.put(src,
                           target,
                           overwrite=overwrite,
                           update_cb=update_cb)
Beispiel #5
0
    def show_progress(count, block_size, total_size):
        if progress['bar'] is None:
            progress['bar'] = progressbar.DataTransferBar(max_value=total_size)

        progress['downloaded'] = count * block_size
        if progress['downloaded'] >= total_size:
            progress['bar'].finish()
            progress['bar'] = None
            progress['downloaded'] = 0
        else:
            progress['bar'].update(progress['downloaded'])
Beispiel #6
0
def initializee_progress_bar(max_value):
    if max_value is None:
        max_value = progressbar.UnknownLength
    widgets = [
        progressbar.Percentage(), ' (',
        progressbar.DataSize(), ' of',
        progressbar.DataSize('max_value'), ' )',
        progressbar.AdaptiveTransferSpeed(),
        progressbar.Bar(marker='█'),
        progressbar.Timer(), ' ',
        progressbar.AdaptiveETA()
    ]
    return progressbar.DataTransferBar(max_value=max_value, widgets=widgets)
Beispiel #7
0
def download_stream(yt: YouTube, stream: Stream):

    with progressbar.DataTransferBar(max_value=stream.filesize) as bar:

        def on_progress(stream, chunk: bytes, bytes_remaining: int):
            # bar.update(len(chunk))
            bar.update(stream.filesize - bytes_remaining)

        def on_complete(stream, file_path: str):
            bar.finish()
            print(f'Downloaded at {file_path}')

        yt.register_on_progress_callback(on_progress)
        yt.register_on_complete_callback(on_complete)
        stream.download()
Beispiel #8
0
    def _get_to_temp(self, src, suffix=None, **mkstemp_args):
        if suffix is None:
            bn = posixpath.basename(src)
            try:
                suffix = bn[bn.rindex("."):]
            except:
                suffix = ""

        with progressbar.DataTransferBar() as bar:

            def update_cb(size, bytes_copied):
                bar.max_value = size
                bar.update(bytes_copied)

            return self._dbfs.get_to_temp(src, suffix=suffix, **mkstemp_args)
Beispiel #9
0
def read_from_socket(sock, file, filesize):
    logger.debug('Receiving file...')
    with progressbar.DataTransferBar(min_value=0, max_value=filesize) as bar:
        read_size = 0
        data = sock.recv(CHUNK_SIZE)
        if not data:
            raise RuntimeError('No data from server')
        if filesize < CHUNK_SIZE:
            read_size = filesize
        while (data):
            file.write(data)
            bar.update(read_size)
            read_size += min(CHUNK_SIZE, filesize - read_size)
            data = sock.recv(CHUNK_SIZE)
    logger.debug('Finished transfer')
Beispiel #10
0
def crc(path):
    log.debug('Calculating CRC32 value')
    checksum = 0
    fsize = os.path.getsize(path)
    i = 0
    chunk_size = 4 * 2**20
    with open(path, 'rb') as f:
        with progressbar.DataTransferBar(max_value=fsize,
                                         max_error=False) as bar:
            while True:
                i += 1
                data = f.read(chunk_size)
                if not data:
                    return checksum & 0xFFFFFFFF

                bar.update(i * chunk_size)
                checksum = crc32(data, checksum)
Beispiel #11
0
def combine_particle_subsamples(download_path,
                                extracted_files,
                                size_bytes,
                                output_name=None):
    """cat files in 'extracted_files' together into new file, then delete extracted_files."""

    output_file = download_path.with_name(output_name)
    expected_output_size = np.sum(size_bytes)

    ## check whether output_file already exists *and* is complete

    if os.path.exists(output_file) and os.path.getsize(
            output_file) == expected_output_size:
        print(f"\tSkipping files, already combined.")
        return

    ## combine files

    print("\tcombining extracted files...")
    print(f"\t\toutput to: {output_file}")
    bar = progressbar.DataTransferBar(max_value=expected_output_size)

    with open(output_file, 'wb') as output_fp:

        bar.update(0)

        for i, filename in enumerate(extracted_files):

            input_file = download_path.with_name(str(filename))

            with open(input_file, 'rb') as input_fp:
                shutil.copyfileobj(input_fp, output_fp)

            bar.update(np.sum(size_bytes[:i]))

    bar.finish()

    ## delete extracted_files

    for filename in extracted_files:
        input_file = download_path.with_name(str(filename))
        input_file.unlink()
Beispiel #12
0
def _process_esc50(esc_50_path, save_path):
    """
    Processes the 2000 5-sec clips of the esc50 dataset and dumps a pickle with the metadata for each audio file.
    The sample rate is hard-coded to 22050.

    Taken with permission from 'https://github.com/karoldvl/paper-2015-esc-convnet' with minor adaptions.

    Args:
        esc_50_path (str): path to the base folder containing the class-specific subfolders.
        save_path (str): folder in which the esc50_audio.dat and the esc50_meta.pkl files will be saved.
    """

    rows_meta = []
    rows_audio = []
    category_counter = 0

    for directory in sorted(os.listdir(esc_50_path)):

        directory = os.path.join(esc_50_path, directory)

        if not (os.path.isdir(directory)
                and os.path.basename(directory)[0:3].isdigit()):
            continue
        print('Parsing ' + directory)

        bar = progressbar.DataTransferBar(max_value=len(os.listdir(directory)))
        for i, clip in enumerate(sorted(os.listdir(directory))):
            if clip[-3:] != 'ogg':
                continue
            filepath = '{0}/{1}'.format(directory, clip)
            filename = os.path.basename(filepath)
            fold = filename[0]
            category = category_counter
            category_name = os.path.dirname(filepath).split('/')[-1]
            rows_meta.append(
                pd.DataFrame(
                    {
                        'filename': filename,
                        'fold': fold,
                        'category': category,
                        'category_name': category_name
                    },
                    index=[0]))
            rows_audio.append(
                load_audio(filepath, 5000, framerate=22050, channel_nr=1))
            bar.update(i)
        bar.finish()
        # libc.malloc_trim(0)
        rows_meta = [pd.concat(rows_meta, ignore_index=True)]
        rows_audio = [np.vstack(rows_audio)]
        category_counter = category_counter + 1

    rows_meta = rows_meta[0]
    rows_meta[['category', 'fold']] = rows_meta[['category',
                                                 'fold']].astype(int)

    rows_meta.to_pickle(os.path.join(save_path, 'esc50_meta.pkl'))
    mm = np.memmap(os.path.join(save_path, 'esc50_audio.dat'),
                   dtype='float32',
                   mode='w+',
                   shape=(2000, 110250))

    mm[:] = rows_audio[0][:]
    mm.flush()
    del rows_audio

    print('processed and saved')
Beispiel #13
0
                        else:
                            curl = pycurl.Curl()
                            curl.setopt(pycurl.URL, data_product_url)
                            curl.setopt(pycurl.FOLLOWLOCATION, 1)
                            curl.setopt(pycurl.MAXREDIRS, 5)
                            curl.setopt(pycurl.CONNECTTIMEOUT, 30)

                            if os.path.exists(download_path):
                                fp = open(download_path, 'ab')
                                curl.setopt(pycurl.RESUME_FROM,
                                            os.path.getsize(download_path))

                            else:
                                fp = open(download_path, 'wb')

                            bar = progressbar.DataTransferBar(
                                max_value=remote_filesize)

                            initial_size = os.path.getsize(download_path)

                            def progress(total, existing, upload_t, upload_d):
                                downloaded = existing + initial_size
                                bar.update(downloaded)

                            curl.setopt(pycurl.NOPROGRESS, 0)
                            curl.setopt(pycurl.PROGRESSFUNCTION, progress)
                            curl.setopt(pycurl.WRITEDATA, fp)

                            print("\tdownloading...")
                            curl.perform()

                            curl.close()
Beispiel #14
0
if args.recursive:
    paths = [ p for path in args.inputs for p in path.glob("**/*.tar.gz") if p.is_file() ]
else:
    paths = args.inputs

print("Calulating total file size...")
total_size = sum([
    member.size
    for path in progressbar.progressbar(paths)
    for member in tarfile.open(path).getmembers() if member.isfile()
    ])

total_success = 0
total_failed = 0

with progressbar.DataTransferBar(max_value=total_size) as progress:
    for path in paths:
        print(f"Importing dataset {path}")
        tf = tarfile.open(path)
        for m in tf.getmembers():
            if m.isfile():
                print(f"- Importing member file {m.name}...")
                logfile = f"{path}/{m.name}"
                mf = tf.extractfile(m)
                def generate_actions(f, progress):
                    for line in f:
                        source = json.loads(line)
                        source["log"] = { "file": { "name": logfile }}
                        source.setdefault("winlog", dict())

                        # Plain data created by nxlog is completely moved to winlog.event_data except blacklisted
Beispiel #15
0
def download_file(file, download_root):
    """Download a Box file, saving it with its 'intrinsic' path inside local_root.

	Arguments:
		file {File} -- the Box file to download.
	"""

    ## determine remote url
    data_product_url = file.get_download_url()

    ## determine local download location
    download_path = download_root / PurePosixPath(
        get_path(file, truncate_prefix=1))

    print(f"download to: {download_path}")

    if not os.path.exists(download_path.parent):
        os.makedirs(download_path.parent)

    ## check if file is already completely downloaded:
    c = pycurl.Curl()
    c.setopt(c.URL, data_product_url)
    c.setopt(c.NOBODY, 1)  # get headers only
    c.perform()
    remote_filesize = c.getinfo(c.CONTENT_LENGTH_DOWNLOAD)

    if (os.path.exists(download_path)
            and os.path.getsize(download_path) == remote_filesize):
        print("\tSkipping file, already downloaded.")

    else:
        curl = pycurl.Curl()
        curl.setopt(pycurl.URL, data_product_url)
        curl.setopt(pycurl.FOLLOWLOCATION, 1)
        curl.setopt(pycurl.MAXREDIRS, 5)
        curl.setopt(pycurl.CONNECTTIMEOUT, 30)

        if os.path.exists(download_path):
            fp = open(download_path, 'ab')
            curl.setopt(pycurl.RESUME_FROM, os.path.getsize(download_path))
        else:
            fp = open(download_path, 'wb')

        bar = progressbar.DataTransferBar(max_value=remote_filesize)

        initial_size = os.path.getsize(download_path)

        def progress(total, existing, upload_t, upload_d):
            downloaded = existing + initial_size
            bar.update(downloaded)

        curl.setopt(pycurl.NOPROGRESS, 0)
        curl.setopt(pycurl.PROGRESSFUNCTION, progress)
        curl.setopt(pycurl.WRITEDATA, fp)

        print("\tdownloading...")
        curl.perform()

        curl.close()
        fp.close()
        bar.finish()

    print(f"")