def create_audio_and_convert(output_dir, captcha_text):
    pwd = os.getcwd()
    work_dir = pwd + '\\symbols\\'
    #combined_sounds = None
    #for symbol in captcha_text:
    #tts_service = random.choice(tts_list)
    #raw_path = work_dir + tts_service + '\\' + symbol
    #sound = AudioSegment.from_wav(raw_path + ".wav")
    #if combined_sounds is None:
    #    combined_sounds = sound
    #else:
    #    combined_sounds = combined_sounds + sound

    audio = AudioCaptcha()
    audio.write(captcha_text, output_dir)

    if not os.path.exists(pwd + '\\' + tempDir):
        print("Creating temp directory " + tempDir)
        os.makedirs(pwd + '\\' + tempDir)

    output_dir_tts = pwd + '\\' + tempDir + '\\'
    wav_audio_path = output_dir_tts + captcha_text + '.wav'
    audio.export(wav_audio_path, format="wav")

    plt.interactive(False)
    clip, sample_rate = librosa.load(wav_audio_path, sr=None)
    fig = plt.figure(figsize=[0.415, 0.210])
    ax = fig.add_subplot(111)
    ax.axes.get_xaxis().set_visible(False)
    ax.axes.get_yaxis().set_visible(False)
    ax.set_frame_on(False)
    S = librosa.feature.melspectrogram(y=clip, sr=sample_rate)
    librosa.display.specshow(librosa.power_to_db(S, ref=np.max))
    filename = os.path.join(output_dir, captcha_text + '.jpg')
    plt.savefig(filename, dpi=400, bbox_inches='tight', pad_inches=0)
    plt.close()
    fig.clf()
    plt.close(fig)
    plt.close('all')
    del clip, sample_rate, fig, ax, S