Esempio n. 1
0
def Conv_Recognize(img):
    conv = load_model("./models/conv_79.h5")

    # resize input image to 28x28
    x = imresize(img, (28, 28))

    x = np.expand_dims(x, axis=0)
    x = np.reshape(x, (28, 28, 1))
    # invert the colors
    x = np.invert(x)
    # brighten the image by 60%
    for i in range(len(x)):
        for j in range(len(x)):
            if x[i][j] > 50:
                x[i][j] = min(255, x[i][j] + x[i][j] * 0.60)

    # normalize the values between -1 and 1
    x = normalize(x)
    val = conv.predict(np.array([x]))

    # return the percent values
    values = list(val[0])
    toReturn = []
    for i in values:
        toReturn.append(round((i * 100), 2))

    return toReturn
Esempio n. 2
0
def ready():

    mlp = load_model("./models/mlp_94.h5")
    conv = load_model("./models/conv_95.5.h5")
    FRUITS = {0: "Apple", 1: "Banana", 2: "Grape", 3: "Pineapple"}
    if request.method == "GET":
        return render_template("index1.html")
    if request.method == "POST":
        data = request.form["payload"].split(",")[1]
        net = request.form["net"]

        img = base64.b64decode(data)
        with open('temp.png', 'wb') as output:
            output.write(img)
        x = imread('temp.png', mode='L')
        # resize input image to 28x28
        x = imresize(x, (28, 28))

        if net == "MLP":
            model = mlp
            # invert the colors
            x = np.invert(x)
            # flatten the matrix
            x = x.flatten()

            # brighten the image a bit (by 60%)
            for i in range(len(x)):
                if x[i] > 50:
                    x[i] = min(255, x[i] + x[i] * 0.60)

        if net == "ConvNet":
            model = conv
            x = np.expand_dims(x, axis=0)
            x = np.reshape(x, (28, 28, 1))
            # invert the colors
            x = np.invert(x)
            # brighten the image by 60%
            for i in range(len(x)):
                for j in range(len(x)):
                    if x[i][j] > 50:
                        x[i][j] = min(255, x[i][j] + x[i][j] * 0.60)

        # normalize the values between -1 and 1
        x = normalize(x)
        val = model.predict(np.array([x]))
        pred = FRUITS[np.argmax(val)]
        classes = ["Apple", "Banana", "Grape", "Pineapple"]
        print(pred)
        print(list(val[0]))
        return render_template("index1.html",
                               preds=list(val[0]),
                               classes=json.dumps(classes),
                               chart=True,
                               putback=request.form["payload"],
                               net=net)
Esempio n. 3
0
 def reshape(self):
     img = (self.image).resize((28, 28)).convert('L')
     np_img = np.array(img)
     np_img = np.expand_dims(np_img, axis=0)
     np_img = np.reshape(np_img, (28, 28, 1))
     np_img = np.invert(np_img)
     for i in range(len(np_img)):
         for j in range(len(np_img)):
             if np_img[i][j] > 50:
                 np_img[i][j] = min(255, np_img[i][j] + np_img[i][j] * 0.80)
     np_img = normalize(np_img)
     return np_img
Esempio n. 4
0
def ready():
    with session.as_default():
        with session.graph.as_default():
            if request.method == "GET":
                return render_template("index1.html")
            if request.method == "POST":
                data = request.form["payload"].split(",")[1]
                type = request.form["type"]
                gan = request.form["gan"]
                print(gan)
                net = "ConvNet"
                if type == "Canvas":
                    img = base64.b64decode(data)
                    with open('temp.png', 'wb') as output:
                        output.write(img)
                    x = imread('temp.png', mode='L')
                if type == "GAN":
                    x = imread('./static/{}.png'.format(gan), mode='L')

                x = imresize(x, (28, 28))
                io.imshow(x)
                io.show()
                if net == "ConvNet":
                    model = conv
                    x = np.expand_dims(x, axis=0)
                    x = np.reshape(x, (28, 28, 1))
                    # invert the colors
                    x = np.invert(x)

                    # brighten the image by 60%
                    for i in range(len(x)):
                        for j in range(len(x)):
                            if x[i][j] > 50:
                                x[i][j] = min(255, x[i][j] + x[i][j] * 0.60)

                # normalize the values between -1 and 1
                x = normalize(x)
                x = x.reshape(28, 28, 1)
                val = model.predict(np.array([x]))
                pred = OBJECTS[np.argmax(val)]
                classes = [
                    'airplane', 'wine bottle', 'butterfly', 'banana',
                    't-shirt', 'umbrella', 'grapes'
                ]
                print(pred)
                print(list(val[0]))
                return render_template("index1.html",
                                       preds=pred,
                                       classes=json.dumps(classes),
                                       chart=True,
                                       putback=request.form["payload"],
                                       net=net)
Esempio n. 5
0
def inference_wiener(args):
    workspace = args.workspace
    iter = args.iteration
    stack_num = args.stack_num
    filename = args.filename
    mini_num = args.mini_num
    visualize = args.visualize
    cuda = args.use_cuda and torch.cuda.is_available()
    print("cuda:", cuda)

    sample_rate = cfg.sample_rate
    fft_size = cfg.fft_size
    hop_size = cfg.hop_size
    window_type = cfg.window_type

    if window_type == 'hamming':
        window = np.hamming(fft_size)

    # Audio
    audio_dir = "/vol/vssp/msos/qk/workspaces/speech_enhancement/mixed_audios/spectrogram/test/0db"
    # audio_dir = "/user/HS229/qk00006/my_code2015.5-/python/pub_speech_enhancement/mixture2clean_dnn/workspace/mixed_audios/spectrogram/test/0db"
    names = os.listdir(audio_dir)

    # Load model.
    target_type = ['speech', 'noise']
    model_dict = {}
    for e in target_type:
        n_freq = 257
        model = DNN(stack_num, n_freq)
        model_path = os.path.join(workspace, "models", filename, e,
                                  "md_%d_iters.tar" % iter)
        checkpoint = torch.load(model_path)
        model.load_state_dict(checkpoint['state_dict'])

        # Move model to GPU.
        if cuda:
            model.cuda()
        model.eval()

        model_dict[e] = model

    # Load scalar
    scalar_path = os.path.join(workspace, "scalars", filename, "scalar.p")
    (mean_, std_) = cPickle.load(open(scalar_path, 'rb'))
    mean_ = move_data_to_gpu(mean_, cuda, volatile=True)
    std_ = move_data_to_gpu(std_, cuda, volatile=True)

    if mini_num > 0:
        n_every = len(names) / mini_num
    else:
        n_every = 1

    out_wav_dir = os.path.join(workspace, "enh_wavs", filename)
    pp_data.create_folder(out_wav_dir)

    for (cnt, name) in enumerate(names):
        if cnt % n_every == 0:
            audio_path = os.path.join(audio_dir, name)
            (audio, _) = pp_data.read_audio(audio_path, sample_rate)

            audio = pp_data.normalize(audio)
            cmplx_sp = pp_data.calc_sp(audio, fft_size, hop_size, window)
            x = np.abs(cmplx_sp)

            # Process data.
            n_pad = (stack_num - 1) / 2
            x = pp_data.pad_with_border(x, n_pad)
            x = pp_data.mat_2d_to_3d(x, stack_num, hop=1)

            # Predict.
            pred_dict = {}
            for e in target_type:
                pred = forward(model_dict[e], x, mean_, std_, cuda)
                pred = pred.data.cpu().numpy()
                pred_dict[e] = pred
            print(cnt, name)

            # Wiener filter.
            pred_mag_sp = pred_dict['speech'] / (
                pred_dict['speech'] + pred_dict['noise']) * np.abs(cmplx_sp)

            pred_cmplx_sp = stft.real_to_complex(pred_mag_sp, cmplx_sp)
            frames = stft.istft(pred_cmplx_sp)

            cola_constant = stft.get_cola_constant(hop_size, window)
            seq = stft.overlap_add(frames, hop_size, cola_constant)
            seq = seq[0:len(audio)]

            # Write out wav
            out_wav_path = os.path.join(out_wav_dir, name)
            pp_data.write_audio(out_wav_path, seq, sample_rate)
            print("Write out wav to: %s" % out_wav_path)

            if visualize:
                vmin = -5.
                vmax = 5.
                fig, axs = plt.subplots(3, 1, sharex=True)
                axs[0].matshow(np.log(np.abs(cmplx_sp)).T,
                               origin='lower',
                               aspect='auto',
                               cmap='jet')
                axs[1].matshow(np.log(np.abs(pred_dict['speech'])).T,
                               origin='lower',
                               aspect='auto',
                               cmap='jet')
                axs[2].matshow(np.log(np.abs(pred_dict['noise'])).T,
                               origin='lower',
                               aspect='auto',
                               cmap='jet')
                plt.show()
Esempio n. 6
0
def inference(args):
    workspace = args.workspace
    iter = args.iteration
    stack_num = args.stack_num
    filename = args.filename
    mini_num = args.mini_num
    visualize = args.visualize
    cuda = args.use_cuda and torch.cuda.is_available()
    print("cuda:", cuda)
    audio_type = 'speech'
    
    sample_rate = cfg.sample_rate
    fft_size = cfg.fft_size
    hop_size = cfg.hop_size
    window_type = cfg.window_type

    if window_type == 'hamming':
        window = np.hamming(fft_size)

    # Audio
    audio_dir = "/vol/vssp/msos/qk/workspaces/speech_enhancement/mixed_audios/spectrogram/test/0db"
    # audio_dir = "/user/HS229/qk00006/my_code2015.5-/python/pub_speech_enhancement/mixture2clean_dnn/workspace/mixed_audios/spectrogram/test/0db"
    names = os.listdir(audio_dir)
    
    speech_dir = "/vol/vssp/msos/qk/workspaces/speech_enhancement/timit_wavs/subtest"
    
    # Load model
    model_path = os.path.join(workspace, "models", filename, audio_type, "md_%d_iters.tar" % iter)
    n_freq = 257
    model = DNN(stack_num, n_freq)
    checkpoint = torch.load(model_path)
    model.load_state_dict(checkpoint['state_dict'])
    
    if cuda:
        model.cuda()
        
    # Load scalar
    scalar_path = os.path.join(workspace, "scalars", filename, "scalar.p")
    (mean_, std_) = cPickle.load(open(scalar_path, 'rb'))
    mean_ = move_data_to_gpu(mean_, cuda, volatile=True)
    std_ = move_data_to_gpu(std_, cuda, volatile=True)
    
    if mini_num > 0:
        n_every = len(names) / mini_num
    else:
        n_every = 1
        
    out_wav_dir = os.path.join(workspace, "enh_wavs", filename)
    pp_data.create_folder(out_wav_dir)
    
    dft = pp_data.DFT(fft_size, cuda)
        
    for (cnt, name) in enumerate(names):
        if cnt % n_every == 0:
            audio_path = os.path.join(audio_dir, name)
            (audio0, _) = pp_data.read_audio(audio_path, sample_rate)
            
            audio = pp_data.normalize(audio0)
            
            # Enframe
            frames = stft.enframe(audio, fft_size, hop_size)
            
            # Process data. 
            n_pad = (stack_num - 1) / 2
            x = pp_data.pad_with_border(frames, n_pad)
            x = pp_data.mat_2d_to_3d(x, stack_num, hop=1)
            
            pred_frames = forward(model, x, mean_, std_, cuda)
            
            pred_frames = pred_frames.data.cpu().numpy()
            
            # cola_constant = 0.5
            # seq = stft.overlap_add(pred_frames, hop_size, cola_constant)
            
            pred_frames *= window
            
            cola_constant = stft.get_cola_constant(hop_size, window)
            seq = stft.overlap_add(pred_frames, hop_size, cola_constant)
            seq = seq[0 : len(audio)]
            
            
            # Write out wav
            out_wav_path = os.path.join(out_wav_dir, name)
            pp_data.write_audio(out_wav_path, seq, sample_rate)
            print("Write out wav to: %s" % out_wav_path)
            
            if visualize:
                
                clean_audio_path = os.path.join(speech_dir, name.split('.')[0] + ".WAV")
                (clean_audio, _) = pp_data.read_audio(clean_audio_path, sample_rate)
                clean_audio = pp_data.normalize(clean_audio)
                clean_frames = stft.enframe(clean_audio, fft_size, hop_size)
                
                mix_sp = np.abs(np.fft.rfft(frames * window, norm='ortho'))
                enh_sp = np.abs(np.fft.rfft(pred_frames * window, norm='ortho'))
                clean_sp = np.abs(np.fft.rfft(clean_frames * window, norm='ortho'))
                
                K = 10
                fig, axs = plt.subplots(K/2,2, sharex=True)
                for k in range(K):
                    axs[k / 2, k % 2].plot(frames[k+100], color='y')
                    axs[k / 2, k % 2].plot(clean_frames[k+100], color='r')
                    axs[k / 2, k % 2].plot(pred_frames[k+100], color='b')
                plt.show()
                
                # import crash
                # asdf
                
                vmin = -5.
                vmax = 5.
                fig, axs = plt.subplots(3,1, sharex=True)
                axs[0].matshow(np.log(np.abs(mix_sp)).T, origin='lower', aspect='auto', cmap='jet', vmin=vmin, vmax=vmax)
                axs[1].matshow(np.log(np.abs(clean_sp)).T, origin='lower', aspect='auto', cmap='jet', vmin=vmin, vmax=vmax)
                axs[2].matshow(np.log(np.abs(enh_sp)).T, origin='lower', aspect='auto', cmap='jet', vmin=vmin, vmax=vmax)
                plt.show()
Esempio n. 7
0
def ExecPy():
    retJson = {
        "predict_digit": "Err",
        "detect_img": "",
        "centering_img": "",
        "prob": {}
    }
    if request.method == 'POST':
        # request.body
        postImg = BytesIO(base64.urlsafe_b64decode(request.form['img']))
        postImg = Image.open(postImg)
        postImg.save("./temp.png")
        #with open('temp.png', 'wb') as output:
        #output.write(img)
        x = imread('temp.png', mode='L')
        # resize input image to 28x28
        x = imresize(x, (28, 28))
        # brighten the image a bit (by 60%)
        #for i in range(len(x)):
        #if x[i] > 50:
        #x[i] = min(255, x[i] + x[i] * 0.60)

        model = conv
        x = np.expand_dims(x, axis=0)
        x = np.reshape(x, (28, 28, 1))
        # invert the colors
        x = np.invert(x)
        # brighten the image by 60%
        for i in range(len(x)):
            for j in range(len(x)):
                if x[i][j] > 50:
                    x[i][j] = min(255, x[i][j] + x[i][j] * 0.60)

        # normalize the values between -1 and 1
        x = normalize(x)
        val = model.predict(np.array([x]))
        pred = FRUITS[np.argmax(val)]
        retJson["predict_digit"] = pred
        # Import the required module for text
        # to speech conversion
        from gtts import gTTS

        # This module is imported so that we can
        # play the converted audio
        import os
        import os.path
        from os import path
        if path.exists('sounds/' + pred + '.mp3'):
            playsound('sounds/' + pred + '.mp3')
        else:

            # The text that you want to convert to audio
            mytext = 'It is a ' + pred

            # Language in which you want to convert
            language = 'en-in'

            # Passing the text and language to the engine,
            # here we have marked slow=False. Which tells
            # the module that the converted audio should
            # have a high speed
            myobj = gTTS(text=mytext, lang=language)

            # Saving the converted audio in a mp3 file named
            # welcome
            myobj.save('sounds/' + pred + '.mp3')
            playsound('sounds/' + pred + '.mp3')

        # Playing the converted file
        # Load the popular external library

        #os.system("mpg321 welcome.mp3")

    return json.dumps(retJson)
Esempio n. 8
0
File: tmp01.py Progetto: zqy1/sednn
def inference(args):
    workspace = args.workspace
    model_name = args.model_name
    stack_num = args.stack_num
    filename = args.filename
    mini_num = args.mini_num
    visualize = args.visualize
    cuda = args.use_cuda and torch.cuda.is_available()
    print("cuda:", cuda)

    sample_rate = cfg.sample_rate
    fft_size = cfg.fft_size
    hop_size = cfg.hop_size
    window_type = cfg.window_type

    if window_type == 'hamming':
        window = np.hamming(fft_size)

    # Audio
    audio_dir = "/vol/vssp/msos/qk/workspaces/speech_enhancement/mixed_audios/spectrogram/test/0db"
    names = os.listdir(audio_dir)

    # Load model
    model_path = os.path.join(workspace, "models", filename, model_name)
    n_freq = 257
    model = DNN(stack_num, n_freq)
    checkpoint = torch.load(model_path)
    model.load_state_dict(checkpoint['state_dict'])

    if cuda:
        model.cuda()

    # Load scalar
    scalar_path = os.path.join(workspace, "scalars", filename, "scalar.p")
    (mean_, std_) = cPickle.load(open(scalar_path, 'rb'))
    mean_ = move_data_to_gpu(mean_, cuda, volatile=True)
    std_ = move_data_to_gpu(std_, cuda, volatile=True)

    if mini_num > 0:
        n_every = len(names) / mini_num
    else:
        n_every = 1

    for (cnt, name) in enumerate(names):
        if cnt % n_every == 0:
            audio_path = os.path.join(audio_dir, name)
            (audio, _) = pp_data.read_audio(audio_path, sample_rate)

            audio = pp_data.normalize(audio)
            sp = pp_data.calc_sp(audio, fft_size, hop_size, window)
            x = np.abs(sp)

            # Process data.
            n_pad = (stack_num - 1) / 2
            x = pp_data.pad_with_border(x, n_pad)
            x = pp_data.mat_2d_to_3d(x, stack_num, hop=1)

            output = forward(model, x, mean_, std_, cuda)
            output = output.data.cpu().numpy()

            print(output.shape)
            if visualize:
                fig, axs = plt.subplots(2, 1, sharex=True)
                axs[0].matshow(np.log(np.abs(sp)).T,
                               origin='lower',
                               aspect='auto',
                               cmap='jet')
                axs[1].matshow(np.log(np.abs(output)).T,
                               origin='lower',
                               aspect='auto',
                               cmap='jet')
                plt.show()

            import crash
            pause