Esempio n. 1
0
def main():
    n_people = 3
    grouppath = "./Dropbox/group/"
    ngroup = dircount(grouppath)

    SR = speaker_recognition(
        n_people, os.path.join(grouppath, "group{}".format(ngroup - 1)))
    index = SR.transform()

    fname = os.path.join(
        grouppath, "group{}/target/aux_{}.wav".format(ngroup - 1,
                                                      int(index[0])))

    afps, data = cis.wavread(fname)
    x = np.array(data)
    for i in range(1, n_people):
        fname = os.path.join(
            grouppath,
            "group{}/target/ilrma_{}.wav".format(ngroup - 1, int(index[1])))
        _, data = cis.wavread(fname)
        x = np.vstack([x, data.astype(float)])

    fname = os.path.join(grouppath,
                         "group{}/target/input.wav".format(ngroup - 1))
    _, data = cis.wavread(fname)

    sepvoice = x
    rawvoice = np.array(data, dtype=np.float32)

    vfname = "./Video/group/group{}/input.avi".format(ngroup - 1)
    outfile = "./Video/group/group{}/output".format(ngroup - 1)
    recog = FacialRecog(vfname, rawvoice[:, 0], sepvoice, afps, outfile)
    recog.main()
Esempio n. 2
0
    def generator_from_files(self, file_path1, file_path2, chunk_size=8000):
        """
        For testing 2-channel audio translation
        :param file_path1: wav file containing signal from the first microphone
        :param file_path2: wav file containing signal from the second microphone
        :param chunk_size: sequence size used for processing
        :return:y[0] -separated 1st channel, y[1] -2nd channel
        """
        rate1, data1 = cis.wavread(file_path1)
        rate2, data2 = cis.wavread(file_path2)
        #rate1, data1 = cis.wavread('./data/rssd_A.wav')
        #rate2, data2 = cis.wavread('./data/rssd_B.wav')
        if rate1 != rate2:
            raise ValueError('Sampling_rate_Error')
        fs = rate1

        #for ind in range(0, data1.shape[0] - chunk_size, chunk_size):
        for ind in range(0, data1.shape[0] - chunk_size, chunk_size):
            start_time = time.time()
            try:
                x = np.array(
                    [data1[ind:ind + chunk_size], data2[ind:ind + chunk_size]],
                    dtype=np.float32)
                y = AuxIVA(x, sample_freq=fs, beta=0.3, nchannel=2).auxiva()
                print("--- %s seconds ---" % (time.time() - start_time))
            except np.linalg.LinAlgError as err:
                if 'Singular matrix' in str(err):
                    # error handling block, probably continue
                    print("Singular matrix")
                    yield np.array([0, 1])
                else:
                    break

            yield y
Esempio n. 3
0
def main():
    grouppath = "./Dropbox/group"
    ngroup = dircount(grouppath)
    dirname = os.path.join(grouppath, "group{}/target".format(ngroup-1))

    fs, data = cis.wavread(os.path.join(dirname, "input.wav"))
    x = np.array([data[:, 0], data[:, 1], data[:, 2]], dtype=np.float32)

    y = ILRMA(x, fs, 2, 200).ilrma()

    cis.wavwrite(os.path.join(dirname, "ilrma_0.wav"), fs, y[0])
    cis.wavwrite(os.path.join(dirname, "ilrma_1.wav"), fs, y[1])
    cis.wavwrite(os.path.join(dirname, "ilrma_2.wav"), fs, y[2])
Esempio n. 4
0
def add_vibrato(file, fv, p=0.1):
    """
    与えられた音源にビブラートをかける関数

    file(wav): ビブラートをかけるwavfileを指定
    fv(int): 周波数がfvの正弦波の形で変動する
    p(int): 元の音源の周波数をa,上下させる周波数をbとした時,p=b/aとしたもの
            元の音源の周波数の何倍変動させるか指定する 
    """
    y,fs = cis.wavread(file)
    t = np.arange(0, y.shape[0]/fs, 1/fs)
    g = t - p/(2*np.pi*fv)*np.cos(2*np.pi*fv*t)
    passing = np.argwhere(g > t[-1])
    if passing.size != 0:
        g = g[:passing[0, 0]]
    mody = np.interp(g, t, y)
    cis.audioplay(mody, fs)
Esempio n. 5
0
        fy1 = max(0, y-h//2)
        fy2 = min(y+h+h//2, src.shape[0])

        output = output // 3
        output[fy1:fy2,fx1:fx2,:] *= 3

        return output

    def __audio_concat(self):
        sft = self.nframe // (self.M+2) * self.afps // self.vfps
        for m in range(self.M+2):
            if m < 1:
                output = self.rawvoice[sft*m:sft*(m+1)]
            elif m > self.M:
                output = np.hstack((output, self.rawvoice[sft*m:]))
            else:
                output = np.hstack((output, self.sepvoice[m-1, sft*m:sft*(m+1)]))
        return output


if __name__ == "__main__":
    rate1, data1 = cis.wavread('./samples/group/auxiva_2.wav')
    rate2, data2 = cis.wavread('./samples/group/auxiva_1.wav')
    rate3, data3 = cis.wavread('./samples/group/auxiva_0.wav')
    rate4, data4 = cis.wavread('./samples/group/output.wav')
    sepvoice = np.array([data1, data2, data3], dtype=np.float32)
    rawvoice = np.array(data4, dtype=np.float32)

    recog = FacialRecog("samples/WIN_20190515_18_14_38_Pro.mp4", rawvoice[:,0], sepvoice, rate1, "samples/aaa")
    recog.main()
Esempio n. 6
0
import numpy as np
import cis
from AuxIVA import AuxIVA
import time

rate1, data1 = cis.wavread('./data/rssd_A.wav')
rate2, data2 = cis.wavread('./data/rssd_B.wav')
#rate3, data3 = cis.wavread('./samples/mixdata/mix3.wav')
if rate1 != rate2:  #or rate2 != rate3:
    raise ValueError('Sampling_rate_Error')
fs = rate1
print(fs)
print(data1.shape)

start_time = time.time()

x = np.array([data1[:10000], data2[:10000]], dtype=np.float32)
y = AuxIVA(x, sample_freq=fs, beta=0.3, nchannel=2).auxiva()

print("--- %s seconds ---" % (time.time() - start_time))
Esempio n. 7
0
import numpy as np
import sys
import cis
import scipy.io.wavfile as wf
from IVA import IVA
from sound_mixing import Preprocessing

#prepare data
rate1, data1 = cis.wavread('./samples/music1.wav')
rate2, data2 = cis.wavread('./samples/music2.wav')
rate3, data3 = cis.wavread('./samples/music3.wav')
if rate1 != rate2 or rate2 != rate3:
    raise ValueError('Sampling_rate_Error')

fs = rate1
x = np.array([data1, data2, data3], dtype=np.float32)
y = IVA(x, fs).iva()

cis.wavwrite('./samples/iva_1_out.wav', fs, y[0])
cis.wavwrite('./samples/iva_2_out.wav', fs, y[1])
cis.wavwrite('./samples/iva_3_out.wav', fs, y[2])