def Conv_Recognize(img): conv = load_model("./models/conv_79.h5") # resize input image to 28x28 x = imresize(img, (28, 28)) x = np.expand_dims(x, axis=0) x = np.reshape(x, (28, 28, 1)) # invert the colors x = np.invert(x) # brighten the image by 60% for i in range(len(x)): for j in range(len(x)): if x[i][j] > 50: x[i][j] = min(255, x[i][j] + x[i][j] * 0.60) # normalize the values between -1 and 1 x = normalize(x) val = conv.predict(np.array([x])) # return the percent values values = list(val[0]) toReturn = [] for i in values: toReturn.append(round((i * 100), 2)) return toReturn
def ready(): mlp = load_model("./models/mlp_94.h5") conv = load_model("./models/conv_95.5.h5") FRUITS = {0: "Apple", 1: "Banana", 2: "Grape", 3: "Pineapple"} if request.method == "GET": return render_template("index1.html") if request.method == "POST": data = request.form["payload"].split(",")[1] net = request.form["net"] img = base64.b64decode(data) with open('temp.png', 'wb') as output: output.write(img) x = imread('temp.png', mode='L') # resize input image to 28x28 x = imresize(x, (28, 28)) if net == "MLP": model = mlp # invert the colors x = np.invert(x) # flatten the matrix x = x.flatten() # brighten the image a bit (by 60%) for i in range(len(x)): if x[i] > 50: x[i] = min(255, x[i] + x[i] * 0.60) if net == "ConvNet": model = conv x = np.expand_dims(x, axis=0) x = np.reshape(x, (28, 28, 1)) # invert the colors x = np.invert(x) # brighten the image by 60% for i in range(len(x)): for j in range(len(x)): if x[i][j] > 50: x[i][j] = min(255, x[i][j] + x[i][j] * 0.60) # normalize the values between -1 and 1 x = normalize(x) val = model.predict(np.array([x])) pred = FRUITS[np.argmax(val)] classes = ["Apple", "Banana", "Grape", "Pineapple"] print(pred) print(list(val[0])) return render_template("index1.html", preds=list(val[0]), classes=json.dumps(classes), chart=True, putback=request.form["payload"], net=net)
def reshape(self): img = (self.image).resize((28, 28)).convert('L') np_img = np.array(img) np_img = np.expand_dims(np_img, axis=0) np_img = np.reshape(np_img, (28, 28, 1)) np_img = np.invert(np_img) for i in range(len(np_img)): for j in range(len(np_img)): if np_img[i][j] > 50: np_img[i][j] = min(255, np_img[i][j] + np_img[i][j] * 0.80) np_img = normalize(np_img) return np_img
def ready(): with session.as_default(): with session.graph.as_default(): if request.method == "GET": return render_template("index1.html") if request.method == "POST": data = request.form["payload"].split(",")[1] type = request.form["type"] gan = request.form["gan"] print(gan) net = "ConvNet" if type == "Canvas": img = base64.b64decode(data) with open('temp.png', 'wb') as output: output.write(img) x = imread('temp.png', mode='L') if type == "GAN": x = imread('./static/{}.png'.format(gan), mode='L') x = imresize(x, (28, 28)) io.imshow(x) io.show() if net == "ConvNet": model = conv x = np.expand_dims(x, axis=0) x = np.reshape(x, (28, 28, 1)) # invert the colors x = np.invert(x) # brighten the image by 60% for i in range(len(x)): for j in range(len(x)): if x[i][j] > 50: x[i][j] = min(255, x[i][j] + x[i][j] * 0.60) # normalize the values between -1 and 1 x = normalize(x) x = x.reshape(28, 28, 1) val = model.predict(np.array([x])) pred = OBJECTS[np.argmax(val)] classes = [ 'airplane', 'wine bottle', 'butterfly', 'banana', 't-shirt', 'umbrella', 'grapes' ] print(pred) print(list(val[0])) return render_template("index1.html", preds=pred, classes=json.dumps(classes), chart=True, putback=request.form["payload"], net=net)
def inference_wiener(args): workspace = args.workspace iter = args.iteration stack_num = args.stack_num filename = args.filename mini_num = args.mini_num visualize = args.visualize cuda = args.use_cuda and torch.cuda.is_available() print("cuda:", cuda) sample_rate = cfg.sample_rate fft_size = cfg.fft_size hop_size = cfg.hop_size window_type = cfg.window_type if window_type == 'hamming': window = np.hamming(fft_size) # Audio audio_dir = "/vol/vssp/msos/qk/workspaces/speech_enhancement/mixed_audios/spectrogram/test/0db" # audio_dir = "/user/HS229/qk00006/my_code2015.5-/python/pub_speech_enhancement/mixture2clean_dnn/workspace/mixed_audios/spectrogram/test/0db" names = os.listdir(audio_dir) # Load model. target_type = ['speech', 'noise'] model_dict = {} for e in target_type: n_freq = 257 model = DNN(stack_num, n_freq) model_path = os.path.join(workspace, "models", filename, e, "md_%d_iters.tar" % iter) checkpoint = torch.load(model_path) model.load_state_dict(checkpoint['state_dict']) # Move model to GPU. if cuda: model.cuda() model.eval() model_dict[e] = model # Load scalar scalar_path = os.path.join(workspace, "scalars", filename, "scalar.p") (mean_, std_) = cPickle.load(open(scalar_path, 'rb')) mean_ = move_data_to_gpu(mean_, cuda, volatile=True) std_ = move_data_to_gpu(std_, cuda, volatile=True) if mini_num > 0: n_every = len(names) / mini_num else: n_every = 1 out_wav_dir = os.path.join(workspace, "enh_wavs", filename) pp_data.create_folder(out_wav_dir) for (cnt, name) in enumerate(names): if cnt % n_every == 0: audio_path = os.path.join(audio_dir, name) (audio, _) = pp_data.read_audio(audio_path, sample_rate) audio = pp_data.normalize(audio) cmplx_sp = pp_data.calc_sp(audio, fft_size, hop_size, window) x = np.abs(cmplx_sp) # Process data. n_pad = (stack_num - 1) / 2 x = pp_data.pad_with_border(x, n_pad) x = pp_data.mat_2d_to_3d(x, stack_num, hop=1) # Predict. pred_dict = {} for e in target_type: pred = forward(model_dict[e], x, mean_, std_, cuda) pred = pred.data.cpu().numpy() pred_dict[e] = pred print(cnt, name) # Wiener filter. pred_mag_sp = pred_dict['speech'] / ( pred_dict['speech'] + pred_dict['noise']) * np.abs(cmplx_sp) pred_cmplx_sp = stft.real_to_complex(pred_mag_sp, cmplx_sp) frames = stft.istft(pred_cmplx_sp) cola_constant = stft.get_cola_constant(hop_size, window) seq = stft.overlap_add(frames, hop_size, cola_constant) seq = seq[0:len(audio)] # Write out wav out_wav_path = os.path.join(out_wav_dir, name) pp_data.write_audio(out_wav_path, seq, sample_rate) print("Write out wav to: %s" % out_wav_path) if visualize: vmin = -5. vmax = 5. fig, axs = plt.subplots(3, 1, sharex=True) axs[0].matshow(np.log(np.abs(cmplx_sp)).T, origin='lower', aspect='auto', cmap='jet') axs[1].matshow(np.log(np.abs(pred_dict['speech'])).T, origin='lower', aspect='auto', cmap='jet') axs[2].matshow(np.log(np.abs(pred_dict['noise'])).T, origin='lower', aspect='auto', cmap='jet') plt.show()
def inference(args): workspace = args.workspace iter = args.iteration stack_num = args.stack_num filename = args.filename mini_num = args.mini_num visualize = args.visualize cuda = args.use_cuda and torch.cuda.is_available() print("cuda:", cuda) audio_type = 'speech' sample_rate = cfg.sample_rate fft_size = cfg.fft_size hop_size = cfg.hop_size window_type = cfg.window_type if window_type == 'hamming': window = np.hamming(fft_size) # Audio audio_dir = "/vol/vssp/msos/qk/workspaces/speech_enhancement/mixed_audios/spectrogram/test/0db" # audio_dir = "/user/HS229/qk00006/my_code2015.5-/python/pub_speech_enhancement/mixture2clean_dnn/workspace/mixed_audios/spectrogram/test/0db" names = os.listdir(audio_dir) speech_dir = "/vol/vssp/msos/qk/workspaces/speech_enhancement/timit_wavs/subtest" # Load model model_path = os.path.join(workspace, "models", filename, audio_type, "md_%d_iters.tar" % iter) n_freq = 257 model = DNN(stack_num, n_freq) checkpoint = torch.load(model_path) model.load_state_dict(checkpoint['state_dict']) if cuda: model.cuda() # Load scalar scalar_path = os.path.join(workspace, "scalars", filename, "scalar.p") (mean_, std_) = cPickle.load(open(scalar_path, 'rb')) mean_ = move_data_to_gpu(mean_, cuda, volatile=True) std_ = move_data_to_gpu(std_, cuda, volatile=True) if mini_num > 0: n_every = len(names) / mini_num else: n_every = 1 out_wav_dir = os.path.join(workspace, "enh_wavs", filename) pp_data.create_folder(out_wav_dir) dft = pp_data.DFT(fft_size, cuda) for (cnt, name) in enumerate(names): if cnt % n_every == 0: audio_path = os.path.join(audio_dir, name) (audio0, _) = pp_data.read_audio(audio_path, sample_rate) audio = pp_data.normalize(audio0) # Enframe frames = stft.enframe(audio, fft_size, hop_size) # Process data. n_pad = (stack_num - 1) / 2 x = pp_data.pad_with_border(frames, n_pad) x = pp_data.mat_2d_to_3d(x, stack_num, hop=1) pred_frames = forward(model, x, mean_, std_, cuda) pred_frames = pred_frames.data.cpu().numpy() # cola_constant = 0.5 # seq = stft.overlap_add(pred_frames, hop_size, cola_constant) pred_frames *= window cola_constant = stft.get_cola_constant(hop_size, window) seq = stft.overlap_add(pred_frames, hop_size, cola_constant) seq = seq[0 : len(audio)] # Write out wav out_wav_path = os.path.join(out_wav_dir, name) pp_data.write_audio(out_wav_path, seq, sample_rate) print("Write out wav to: %s" % out_wav_path) if visualize: clean_audio_path = os.path.join(speech_dir, name.split('.')[0] + ".WAV") (clean_audio, _) = pp_data.read_audio(clean_audio_path, sample_rate) clean_audio = pp_data.normalize(clean_audio) clean_frames = stft.enframe(clean_audio, fft_size, hop_size) mix_sp = np.abs(np.fft.rfft(frames * window, norm='ortho')) enh_sp = np.abs(np.fft.rfft(pred_frames * window, norm='ortho')) clean_sp = np.abs(np.fft.rfft(clean_frames * window, norm='ortho')) K = 10 fig, axs = plt.subplots(K/2,2, sharex=True) for k in range(K): axs[k / 2, k % 2].plot(frames[k+100], color='y') axs[k / 2, k % 2].plot(clean_frames[k+100], color='r') axs[k / 2, k % 2].plot(pred_frames[k+100], color='b') plt.show() # import crash # asdf vmin = -5. vmax = 5. fig, axs = plt.subplots(3,1, sharex=True) axs[0].matshow(np.log(np.abs(mix_sp)).T, origin='lower', aspect='auto', cmap='jet', vmin=vmin, vmax=vmax) axs[1].matshow(np.log(np.abs(clean_sp)).T, origin='lower', aspect='auto', cmap='jet', vmin=vmin, vmax=vmax) axs[2].matshow(np.log(np.abs(enh_sp)).T, origin='lower', aspect='auto', cmap='jet', vmin=vmin, vmax=vmax) plt.show()
def ExecPy(): retJson = { "predict_digit": "Err", "detect_img": "", "centering_img": "", "prob": {} } if request.method == 'POST': # request.body postImg = BytesIO(base64.urlsafe_b64decode(request.form['img'])) postImg = Image.open(postImg) postImg.save("./temp.png") #with open('temp.png', 'wb') as output: #output.write(img) x = imread('temp.png', mode='L') # resize input image to 28x28 x = imresize(x, (28, 28)) # brighten the image a bit (by 60%) #for i in range(len(x)): #if x[i] > 50: #x[i] = min(255, x[i] + x[i] * 0.60) model = conv x = np.expand_dims(x, axis=0) x = np.reshape(x, (28, 28, 1)) # invert the colors x = np.invert(x) # brighten the image by 60% for i in range(len(x)): for j in range(len(x)): if x[i][j] > 50: x[i][j] = min(255, x[i][j] + x[i][j] * 0.60) # normalize the values between -1 and 1 x = normalize(x) val = model.predict(np.array([x])) pred = FRUITS[np.argmax(val)] retJson["predict_digit"] = pred # Import the required module for text # to speech conversion from gtts import gTTS # This module is imported so that we can # play the converted audio import os import os.path from os import path if path.exists('sounds/' + pred + '.mp3'): playsound('sounds/' + pred + '.mp3') else: # The text that you want to convert to audio mytext = 'It is a ' + pred # Language in which you want to convert language = 'en-in' # Passing the text and language to the engine, # here we have marked slow=False. Which tells # the module that the converted audio should # have a high speed myobj = gTTS(text=mytext, lang=language) # Saving the converted audio in a mp3 file named # welcome myobj.save('sounds/' + pred + '.mp3') playsound('sounds/' + pred + '.mp3') # Playing the converted file # Load the popular external library #os.system("mpg321 welcome.mp3") return json.dumps(retJson)
def inference(args): workspace = args.workspace model_name = args.model_name stack_num = args.stack_num filename = args.filename mini_num = args.mini_num visualize = args.visualize cuda = args.use_cuda and torch.cuda.is_available() print("cuda:", cuda) sample_rate = cfg.sample_rate fft_size = cfg.fft_size hop_size = cfg.hop_size window_type = cfg.window_type if window_type == 'hamming': window = np.hamming(fft_size) # Audio audio_dir = "/vol/vssp/msos/qk/workspaces/speech_enhancement/mixed_audios/spectrogram/test/0db" names = os.listdir(audio_dir) # Load model model_path = os.path.join(workspace, "models", filename, model_name) n_freq = 257 model = DNN(stack_num, n_freq) checkpoint = torch.load(model_path) model.load_state_dict(checkpoint['state_dict']) if cuda: model.cuda() # Load scalar scalar_path = os.path.join(workspace, "scalars", filename, "scalar.p") (mean_, std_) = cPickle.load(open(scalar_path, 'rb')) mean_ = move_data_to_gpu(mean_, cuda, volatile=True) std_ = move_data_to_gpu(std_, cuda, volatile=True) if mini_num > 0: n_every = len(names) / mini_num else: n_every = 1 for (cnt, name) in enumerate(names): if cnt % n_every == 0: audio_path = os.path.join(audio_dir, name) (audio, _) = pp_data.read_audio(audio_path, sample_rate) audio = pp_data.normalize(audio) sp = pp_data.calc_sp(audio, fft_size, hop_size, window) x = np.abs(sp) # Process data. n_pad = (stack_num - 1) / 2 x = pp_data.pad_with_border(x, n_pad) x = pp_data.mat_2d_to_3d(x, stack_num, hop=1) output = forward(model, x, mean_, std_, cuda) output = output.data.cpu().numpy() print(output.shape) if visualize: fig, axs = plt.subplots(2, 1, sharex=True) axs[0].matshow(np.log(np.abs(sp)).T, origin='lower', aspect='auto', cmap='jet') axs[1].matshow(np.log(np.abs(output)).T, origin='lower', aspect='auto', cmap='jet') plt.show() import crash pause