コード例 #1
0
def get_cross_talk(database_path, dataset, json_path):
    database_path = database_path / 'CHiME5'
    dataset_transciption_path = database_path / 'transcriptions' / dataset
    json_dict = dict()
    for session_path in dataset_transciption_path.glob('*.json'):
        trans = load_json(session_path)
        session_id = trans[0]['session_id']
        json_dict[session_id] = dict()
        total = len(trans)
        speaker_ids = [
            key for key in trans[0]['start_time'].keys() if 'P' in key
        ]
        out_dict = {
            speaker:
            {speaker: dict(start=[], end=[])
             for speaker in speaker_ids}
            for speaker in speaker_ids
        }
        from concurrent.futures import ThreadPoolExecutor
        with ThreadPoolExecutor(os.cpu_count()) as ex:
            for speaker, example_dict in tqdm.tqdm(
                    ex.map(get_dict_speaker, trans),
                    total=total,
                    desc=dataset + '_' + session_id):
                if example_dict is not None:
                    out_dict[speaker] = combine_dicts(example_dict,
                                                      out_dict[speaker])
        out_dict['cross_talk'] = get_cross_talk_per_mic(out_dict)

        dump_json(out_dict, str(json_path / session_id) + '.json')
コード例 #2
0
def load_transciption_json(path, chime6):
    try:
        return transform_transciption_list(load_json(path), chime6)
    except Exception as e:
        raise RuntimeError(
            'See above exception msg.\n'
            f'The problematic json file is {path}.'
        ) from e
コード例 #3
0
def get_active_speaker(start_sample,
                       end_sample,
                       session_id,
                       mic_id,
                       json_path=None,
                       speaker_json=None,
                       sample_step=1,
                       dtype=bool):
    if json_path is not None:
        speaker_json = load_json(str(json_path / session_id) + '.json')
    elif speaker_json is None:
        raise ValueError('Either json_path or speaker_json have to be defined')
    out_dict = dict()
    for key, value in speaker_json['cross_talk'].items():
        cross_talk = to_numpy(value,
                              start_sample,
                              end_sample,
                              sample_step=sample_step,
                              dtype=dtype)
        activity = to_numpy(speaker_json[key][mic_id], start_sample,
                            end_sample, sample_step, dtype)
        out_dict[key] = dict(cross_talk=cross_talk, activity=activity)
    return out_dict
コード例 #4
0
ファイル: create_json.py プロジェクト: vimalmanohar/pb_chime5
def load_transciption_json(path):
    return transform_transciption_list(load_json(path))