예제 #1
0
    def __init__(self, scp, model, post, conf, output, log=False):
        """
        Инициализация сегментатора
        
        Аргументы:
            scp: путь к .SCP файлу с аудио
            model: путь к .RAW файлу модели сегментации
            post: путь к .VEC файлу апостериорных вероятностей сегментации
            conf: путь к .CONF конфигурационному файлу сегментации
            output: путь к директории с результатами сегментации
            log: признак логирования
        """
        self.scp = scp
        self.model = model
        self.post = post
        self.conf = conf
        self.output = Path(output)
        self.log = log

        sad_model = NnetSAD.read_model(model)
        sad_post = NnetSAD.read_average_posteriors(post)
        sad_transform = NnetSAD.make_sad_transform(sad_post)
        sad_graph = NnetSAD.make_sad_graph()
        decodable_opts = NnetSimpleComputationOptions()
        decodable_opts.extra_left_context = 79
        decodable_opts.extra_right_context = 21
        decodable_opts.extra_left_context_initial = 0
        decodable_opts.extra_right_context_final = 0
        decodable_opts.frames_per_chunk = 150
        decodable_opts.acoustic_scale = 0.3
        self.sad = NnetSAD(sad_model,
                           sad_transform,
                           sad_graph,
                           decodable_opts=decodable_opts)
        self.seg = SegmentationProcessor([2])
예제 #2
0
# Reading from config file
parser = ConfigParser()
parser.read('sad_model.conf')
samplerate = int(parser.get('AUDIO', 'SAMPLE_RATE'))
n_channels = int(parser.get('AUDIO', 'N_CHANNELS'))
encoding = parser.get('AUDIO', 'ENCODING')
sad_final_raw = parser.get('SAD', 'FINAL_RAW')
post_output_vec = parser.get('SAD', 'POST_OUTPUT_VEC')

# Construct SAD
model = NnetSAD.read_model(sad_final_raw)
post = NnetSAD.read_average_posteriors(post_output_vec)
transform = NnetSAD.make_sad_transform(post)
graph = NnetSAD.make_sad_graph(min_silence_duration=0.1)
decodable_opts = NnetSimpleComputationOptions()
decodable_opts.extra_left_context = 79
decodable_opts.extra_right_context = 21
decodable_opts.extra_left_context_initial = 0
decodable_opts.extra_right_context_final = 0
decodable_opts.frames_per_chunk = 150
decodable_opts.acoustic_scale = 0.3
sad = NnetSAD(model, transform, graph, decodable_opts=decodable_opts)
seg = SegmentationProcessor(target_labels=[2],
                            min_segment_dur=3,
                            max_merged_segment_dur=3.84)


def convert_to_wav(path):
    '''
        1.  Converts audio file in other formats to wav format
        2.  Output directory is same as input file directory