Beispiel #1
0
def find_data(ref, db):
    idx = ref.parent.name
    data = [entry for entry in db if entry["symbol"] == idx]
    if len(data) > 0:
        return data[0]
    else:
        LOG.warning(f"WARNING: Reference {idx} not found")
        return {}
Beispiel #2
0
async def read_info_file(info_file, check=True, verbose=VERBOSE):
    if not info_file:
        return {}
    if info_file.exists():
        async with aiofiles.open(info_file, "r") as info:
            data = await info.read()
            if verbose > 1:
                LOG.info(f"Info file read:{get_tabs('', prev=15)}{info_file}")
            return json.loads(data)
    else:
        if check:
            LOG.error(f"ERROR: No info found at {info_file}")
        if verbose > 1:
            LOG.warning(f"Info file: {info_file}\tDO NOT EXISTS!")
        return {}
Beispiel #3
0
def _process_audio_files(worker_id: int,
                         tasks: pd.DataFrame,
                         p_out: PosixPath,
                         p_raw: PosixPath,
                         n_samples: int = 59049,
                         sample_rate: int = 22050,
                         topk: int = 50,
                         file_pattern: str = 'clip-{}-seg-{}-of-{}') -> None:
    n_tasks = tasks.shape[0]
    t_start = time.time()
    n_parts = n_tasks // 10
    idx = 0
    LOG.info(f"[Worker {worker_id:02d}]: Received {n_tasks} tasks.")

    for i, t in tasks.iterrows():
        # find output dir
        split = t.split
        out_dir = p_out.joinpath(split)

        # process audio file
        try:
            segments = _segment_audio(_load_audio(p_raw.joinpath(t.mp3_path), sample_rate=sample_rate),
                                      n_samples=n_samples,
                                      center=False)
            loaded = True
        except (RuntimeError, EOFError) as e:
            LOG.warning(f"[Worker {worker_id:02d}]: Failed load audio: {t.mp3_path}. Ignored.")
            loaded = False

        # save label and segments to npy files
        if loaded:
            labels = t[t.index.tolist()[:topk]].values.astype(bool)
            n_segments = len(segments)
            for j, seg in enumerate(segments):
                np.savez_compressed(out_dir.joinpath(file_pattern.format(t.clip_id, j+1, n_segments)).as_posix(), data=seg, labels=labels)

        # report progress
        idx += 1
        if idx == n_tasks:
            LOG.info(f"[Worker {worker_id:02d}]: Job finished. Quit. (time usage: {(time.time() - t_start) / 60:.02f} min)")
        elif idx % n_parts == 0:
            LOG.info(f"[Worker {worker_id:02d}]: {idx//n_parts*10}% tasks done. (time usage: {(time.time() - t_start) / 60:.02f} min)")
    return