Esempio n. 1
0
def specaug_fbank_test():
    conf = {
        "feature_type": "fbank",
        "sample_rate": 16000,
        "num_mel_bins": 80,
        "use_energy": False,
        "spec_aug": {
            "freq_mask_num": 2,
            "freq_mask_width": 27,
            "time_mask_num": 2,
            "time_mask_width": 100,
        }
    }
    fn = "file:testdata/100-121669-0000.wav"
    pipe = "pipe:flac -c -d -s testdata/103-1240-0005.flac |"
    sample_rate, waveform1 = utils.load_wave(fn)
    sample_rate, waveform2 = utils.load_wave(pipe)
    waveform1 = torch.from_numpy(waveform1)
    waveform2 = torch.from_numpy(waveform2)
    lengths = [waveform1.shape[0], waveform2.shape[0]]
    max_length = max(lengths)
    print(lengths)
    padded_waveforms = torch.zeros(2, max_length)
    padded_waveforms[0, :lengths[0]] += waveform1
    padded_waveforms[1, :lengths[1]] += waveform2
    layer = SPLayer(conf)

    features, feature_lengths = layer(padded_waveforms, lengths)

    import matplotlib as mpl
    mpl.use('Agg')
    import matplotlib.pyplot as plt
    plt.imshow(features[1].numpy())
    plt.savefig("test.png")
Esempio n. 2
0
def get_dur(wav_dic):
    durdic = {}
    for key, path in wav_dic.items():
        sample_rate, data = utils.load_wave(path)
        dur = data.shape[0] / float(sample_rate)
        durdic[key] = dur
    return durdic
Esempio n. 3
0
 def __getitem__(self, idx):
     if self.feature_type == "waveform":
         path, label = self.item[idx]
         return (load_wave(path), label)
     else:
         path, label = self.item[idx]
         feature = torch.FloatTensor(read_ark(path))
         if self.mel:
             return (F_Mel(feature, audio_conf), label)
         else:
             return (feature, label)
Esempio n. 4
0
def load_wave_batch(paths):
    waveforms = []
    lengths = []
    for path in paths:
        sample_rate, waveform = utils.load_wave(path)
        waveform = torch.from_numpy(waveform)
        waveforms.append(waveform)
        lengths.append(waveform.shape[0])
    max_length = max(lengths)
    padded_waveforms = torch.zeros(len(lengths), max_length)
    for i in range(len(lengths)):
        padded_waveforms[i, :lengths[i]] += waveforms[i]
    return padded_waveforms, torch.tensor(lengths).long()
Esempio n. 5
0
def specaug_test():
    featconf = {
        "feature_type": "fbank",
        "sample_rate": 16000,
        "num_mel_bins": 40,
        "use_energy": False
    }
    augconf = {
        "feature_type": "fbank",
        "sample_rate": 16000,
        "num_mel_bins": 40,
        "use_energy": False,
        "spec_aug": {
            "freq_mask_width": 10,
            "freq_mask_num": 2,
            "time_mask_width": 100,
            "time_mask_num": 2
        }
    }
    fn = "file:testdata/100-121669-0000.wav"
    pipe = "pipe:flac -c -d -s testdata/103-1240-0005.flac |"
    sample_rate, waveform1 = utils.load_wave(fn)
    sample_rate, waveform2 = utils.load_wave(pipe)
    waveform1 = torch.from_numpy(waveform1)
    waveform2 = torch.from_numpy(waveform2)
    lengths = [waveform1.shape[0], waveform2.shape[0]]
    max_length = max(lengths)
    padded_waveforms = torch.zeros(2, max_length)
    padded_waveforms[0, :lengths[0]] += waveform1
    padded_waveforms[1, :lengths[1]] += waveform2
    splayer = SPLayer(featconf)
    auglayer = SPLayer(augconf)
    features, feature_lengths = splayer(padded_waveforms, lengths)
    features2, feature_lengths2 = auglayer(padded_waveforms, lengths)
    print("Before augmentation")
    print(features)
    print("After augmentation")
    print(features2)
Esempio n. 6
0
def test_load_wave():
    pipe = "pipe:flac -c -d -s testdata/100-121669-0000.flac | "
    fn = "file:testdata/100-121669-0000.wav"
    ark = "ark:/data1/Corpora/LibriSpeech/ark/train_960.ark:16"
    ark2 = "ark:/data1/Corpora/LibriSpeech/ark/train_960.ark:2591436"
    timer = utils.Timer()
    timer.tic()
    s3, d3 = utils.load_wave(ark)
    print("Load ark time: {}s".format(timer.toc()))
    timer.tic()
    s2, d2 = utils.load_wave(fn)
    print("Load file time: {}s".format(timer.toc()))
    timer.tic()
    s1, d1 = utils.load_wave(pipe)
    print("Load flac pipe time: {}s".format(timer.toc()))
    print("Load ark2")

    s, d = utils.load_wave(ark2)

    assert s1 == s2
    assert s3 == s2
    assert np.sum(d1 != d2) == 0
    assert np.sum(d3 != d2) == 0
Esempio n. 7
0
def fbank_test():
    conf = {
        "feature_type": "fbank",
        "sample_rate": 16000,
        "num_mel_bins": 40,
        "use_energy": False
    }
    fn = "file:testdata/100-121669-0000.wav"
    pipe = "pipe:flac -c -d -s testdata/103-1240-0005.flac |"
    sample_rate, waveform1 = utils.load_wave(fn)
    sample_rate, waveform2 = utils.load_wave(pipe)
    waveform1 = torch.from_numpy(waveform1)
    waveform2 = torch.from_numpy(waveform2)
    lengths = [waveform1.shape[0], waveform2.shape[0]]
    max_length = max(lengths)
    padded_waveforms = torch.zeros(2, max_length)
    padded_waveforms[0, :lengths[0]] += waveform1
    padded_waveforms[1, :lengths[1]] += waveform2
    layer = SPLayer(conf)

    features, feature_lengths = layer(padded_waveforms, lengths)
    print(features)
    print(feature_lengths)
Esempio n. 8
0
        def decode_file():
            start = time.time_ns()
            get_wave = utils.load_wave(save_base='../receive', file_name='output.wav')
            end = time.time_ns()
            print('读取文件耗时:', (end - start) / 1e6, 'ms')
            if len(get_wave.shape) == 2:
                get_wave = get_wave[:, 0]

            start = time.time_ns()
            packets = FSK.demodulation(utils.init_args(), get_wave)
            count, result = utils.decode_bluetooth_packet(utils.init_args(), packets)
            f = open('../result.txt', 'w', encoding='utf-8')
            f.write(result)
            f.close()
            end = time.time_ns()
            print('解码文本耗时:', (end - start) / 1e6, 'ms')
            print('蓝牙包成功解码数量:{}\n解码信息:{}\n'.format(count, result))
Esempio n. 9
0
 def get_result(self):
     '''
     描述:解析并获取录音结果
     参数:无
     返回:无
     '''
     get_wave = load_wave(save_base=args.save_base_receive,
                          file_name=self.save_place)
     wave = bandpass(get_wave, self.args.framerate,
                     self.args.frequency - 500, self.args.frequency + 500)
     #get_wave = filt_wave(get_wave)
     get_seq = pulse_demodulation(wave, args)
     print(get_seq)
     result = decode(get_seq)
     decode_original = decode(self.original_seq)
     #计算传输速率,丢包率,准确率
     speed = len(
         self.original_seq) / (len(get_wave) / self.args.framerate) / 8
     packet_loss = max(0, (len(decode_original) - len(result)) /
                       len(decode_original))
     show_text = "原始结果:{}\n接收结果:{} \n传输速率:{:.2f}byte/s\n丢包率:{:.2f}%\n".format(
         decode_original, result, speed, 100 * packet_loss)
     tkinter.messagebox.showinfo('传输结果', show_text)
Esempio n. 10
0
    parser.add_argument("--nchannels", type=int, default=1)
    parser.add_argument("--volume", type=float, default=1.0)
    parser.add_argument("--start_place", type=int, default=0)
    parser.add_argument("--pulse_length", type=float, default=0.01)
    parser.add_argument("--interval_0", type=float, default=0.01)
    parser.add_argument("--interval_1", type=float, default=0.02)
    parser.add_argument("--save_base", type=str, default='sound')
    parser.add_argument("--file_name", type=str, default='pulse.wav')
    parser.add_argument("--seq_len", type=int, default=100)
    args = parser.parse_args()

    original_seq = generate_random_seq(args.seq_len)
    print("The original seq is:\n", original_seq)

    the_wave = pulse_modulation(original_seq, args)
    save_wave(the_wave,
              framerate=args.framerate,
              sample_width=args.sample_width,
              nchannels=args.nchannels,
              save_base=args.save_base,
              file_name=args.file_name)
    get_wave = load_wave(save_base=args.save_base, file_name=args.file_name)
    get_seq = pulse_demodulation(get_wave, args)
    print("The loaded seq is:\n", get_seq)

    result = compare_seqs(original_seq, get_seq)
    if result:
        print("The original seq and the seq I get is identical, right!")
    else:
        print("The original seq and the seq I get is not identical, wrong!")
Esempio n. 11
0
def main():
    packets = FSK.demodulation(utils.init_args(),
                               utils.load_wave("", "_tmp_a_0_6535.wav"))