Beispiel #1
0
def get_abspath(path):
    """Return the absolute path of <path>.

    Because the working directory of MLHUB model is ~/.mlhub/<model>,
    when user run 'ml score facematch <image-path>', the <image-path> may be a
    path relative to the path where 'ml score facematch' is typed, to cope with
    this scenario, mlhub provides mlhub.utils.get_cmd_cwd() to obtain this path.
    """

    path = os.path.expanduser(path)
    if not os.path.isabs(path):
        path = os.path.join(get_cmd_cwd(), path)

    return os.path.abspath(path)
Beispiel #2
0
        headers = {'User-Agent': 'Mozilla/5.0'}
        req = urllib.request.Request(url, headers=headers)

        if urllib.request.urlopen(req).status == 200:
            try:
                analysis = client.analyze_image_by_domain(
                    domain, url, language)
            except Exception as e:
                catch_exception(e, url)

    except urllib.error.URLError:
        sys.exit("Error: The URL does not appear to exist. Please check.\n"
                 f"{url}")

else:
    path = os.path.join(get_cmd_cwd(), url)
    with open(path, 'rb') as fstream:
        try:
            analysis = client.analyze_image_by_domain_in_stream(
                domain, fstream, language)
        except Exception as e:
            catch_exception(e, path)

for landmark in analysis.result["landmarks"]:
    print('{},{}'.format(
        round(landmark["confidence"], 2),
        landmark["name"],
    ))

# Write results to stdout
Beispiel #3
0
# -----------------------------------------------------------------------
# Set up a speech configuration.
# -----------------------------------------------------------------------

speech_config = speechsdk.SpeechConfig(subscription=key, region=location)

if args.lang:
    speech_config.speech_recognition_language = args.lang

# -----------------------------------------------------------------------
# Transcribe file or from microphone.
# -----------------------------------------------------------------------

if args.input:
    path = os.path.join(get_cmd_cwd(), args.input)
    if not os.path.exists(path):
        sys.exit(f"azspeech transcribe: File not found: {path}")

    try:
        w = wave.open(path)
    except Exception as e:
        # print(e)
        sys.exit(
            f"azspeech transcribe: File does not seem to be wav audio: {path}")

    # Create a callback to terminate the transcription once the full
    # audio has been transcribed.

    done = False
Beispiel #4
0
height = 50

if is_url(url):
    try:
        headers = {'User-Agent': 'Mozilla/5.0'}
        req = urllib.request.Request(url, headers=headers)

        if urllib.request.urlopen(req).status == 200:
            try:
                analysis = client.generate_thumbnail(width, height, url)
            except Exception as e:
                catch_exception(e, url)

        sname = re.sub('\.(\w+)$', r'-thumbnail.\1',
                       os.path.basename(urlparse(url).path))
        sname = os.path.join(get_cmd_cwd(), sname)

    except urllib.error.URLError:
        sys.exit("Error: The URL does not appear to exist. Please check."
                 f"{url}")
else:
    path = os.path.join(get_cmd_cwd(), url)
    with open(path, 'rb') as fstream:
        try:
            analysis = client.generate_thumbnail_in_stream(
                width, height, fstream)
        except Exception as e:
            catch_exception(e, path)

    sname = re.sub('\.(\w+)$', r'-thumbnail.\1', path)
Beispiel #5
0
def recognise(input_file, single_line, key):
    # -----------------------------------------------------------------------
    # Create verification voice profile
    # -----------------------------------------------------------------------

    create_headers = {
        'Ocp-Apim-Subscription-Key': key,
        'Content-Type': 'application/json'
    }

    endpoint = 'https://westus.api.cognitive.microsoft.com/'
    path = 'speaker/verification/v2.0/text-dependent/profiles'
    create_profile_url = endpoint + path
    result = requests.post(create_profile_url,
                           data="{\"locale\":\"en-US\"}",
                           headers=create_headers)

    # -----------------------------------------------------------------------
    # Enroll the voice profile (3 samples)
    # -----------------------------------------------------------------------

    # If the location is not westus, profileId will not be found.
    try:
        profile_id = result.json()['profileId']
    except:
        if result.json()['error']['code'] == '401':
            sys.exit(
                "The Azure Speech key is not correct, please run ml configure azspeech to update your key. "
            )
        else:
            error = result.json()['error']['message']
            sys.exit(f"Error: {error}")

    enroll_url = create_profile_url + "/" + profile_id + "/enrollments"
    enroll_header = {
        'Ocp-Apim-Subscription-Key': key,
        'Content-Type': 'audio/wav; codecs=audio/pcm; samplerate=16000'
    }

    # Add three sample audios
    for i in range(0, 3):
        path = os.path.join(get_cmd_cwd(), input_file[i])

        try:
            w = wave.open(path, "rb")
        except FileNotFoundError:
            sys.exit(f"Error: wrong sample file location. \n{input_file[i]}")

        # Convert audio file into binary format
        binary_data = w.readframes(w.getnframes())
        w.close()
        result = requests.post(enroll_url,
                               data=binary_data,
                               headers=enroll_header)

        # Catch the invalid audios
        try:
            if result.json()['error']['message']:
                error = result.json()['error']['message']
                print(f"The sample audio file {input_file[i]} error: {error}",
                      file=sys.stderr)
        except:
            pass
        else:
            sys.exit(1)

    # -----------------------------------------------------------------------
    # Verify the audio
    # -----------------------------------------------------------------------

    verify_url = create_profile_url + "/" + profile_id + "/verify"
    verify_header = {
        'Ocp-Apim-Subscription-Key': key,
        'Content-Type': 'audio/wav; codecs=audio/pcm; samplerate=16000'
    }

    path = os.path.join(get_cmd_cwd(), input_file[3])

    try:
        w = wave.open(path, "rb")
    except FileNotFoundError:
        sys.exit(f"Error: wrong verification file location.\n{input_file[3]}")

    # Convert audio file into binary format
    binary_data = w.readframes(w.getnframes())
    w.close()
    result = requests.post(verify_url, data=binary_data, headers=verify_header)

    try:
        if not single_line:
            print("Result: " + result.json()['recognitionResult'])
            print("Score: " + str(result.json()['score']))
        else:
            print(result.json()['recognitionResult'] + ", " +
                  str(result.json()['score']))
    except:
        error = result.json()['error']['message']
        sys.exit(f"Error: {error}")

    # -----------------------------------------------------------------------
    # Delete the voice profile
    # -----------------------------------------------------------------------

    delete_url = create_profile_url + "/" + profile_id
    delete_header = {
        'Ocp-Apim-Subscription-Key': key,
    }
    requests.delete(delete_url, headers=delete_header)
Beispiel #6
0
    'Ocp-Apim-Subscription-Key': key,
    'Ocp-Apim-Subscription-Region': location,
    'Content-type': 'application/json',
}

endpoint = 'https://api.cognitive.microsofttranslator.com/'
path = '/translate?api-version=3.0'
translate_url = endpoint + path

# ----------------------------------------------------------------------
# Read the text to be translated.
# ----------------------------------------------------------------------

text = ""
if args.input:
    text = open(os.path.join(get_cmd_cwd(), args.input), "r").read()
elif args.sentence:
    text = " ".join(args.sentence)

# ----------------------------------------------------------------------
# Support function to translate the text.
# ----------------------------------------------------------------------


def translate(text, to):
    json = [{'text': text}]
    params = f'&to={to}'
    result = requests.post(translate_url + params, headers=headers, json=json)
    result = result.json()

    sys.stdout.write(f"{result[0]['detectedLanguage']['language']}," +
Beispiel #7
0
        url = os.path.abspath(os.path.expanduser(url))

        if CMD_CWD != '':
            os.chdir(oldwd)

        if os.path.isdir(url):
            for img in os.listdir(url):
                img_file = os.path.join(url, '', img)
                _colorize_one_img(img_file)
        else:
            _colorize_one_img(url)


# The working dir of the command which invokes this script.

CMD_CWD = get_cmd_cwd()


# Setup input path completion

readline.set_completer_delims('\t')
readline.parse_and_bind("tab: complete")
readline.set_completer(_tab_complete_path)

# Scoring

if len(args.path) == 0:
    try:
        url = input("Path or URL of images to colorize (Quit by Ctrl-d):\n(You could try images in '~/.mlhub/colorize/images/')\n> ")
    except EOFError:
        print()
Beispiel #8
0
    help='path to an audio file to save. The file type should be wav')

args = option_parser.parse_args()

# ----------------------------------------------------------------------
# Request subscription key and location from user.
# ----------------------------------------------------------------------
key, location = get_private()

# ----------------------------------------------------------------------
# Read the text to be translated.
# ----------------------------------------------------------------------

text = ""
if args.input:
    text = open(os.path.join(get_cmd_cwd(), args.input), "r").read()
elif args.sentence:
    text = " ".join(args.sentence)

# Split the text into a list of sentences. Each sentence is sent off
# for synthesis. This avoids a very long text going off to the
# synthesizer (which seems to have a limit of some 640 characters) and
# is a natural break point anyhow.

text = " ".join(text.splitlines())
text = text.replace(". ", "\n")
text = text.splitlines()

# -----------------------------------------------------------------------
# Set up a speech synthesizer using the default speaker as audio output.
#
Beispiel #9
0
def synthesize_translations(to_language, single_line, result, output, key,
                            region):
    if not single_line:
        print(f'Recognized: "{result.text}"')
        try:
            translation = result.translations[to_language]
        except Exception as e:
            print(f"Error:{e}.", file=sys.stderr)
            print(
                "Error: wrong original or target language code. For original language code, please choose one from "
                "Loale in Speech-to-text table"
                "(https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support#speech"
                "-to-text).\n "
                "For target language code, please choose one from Language Code in Text languages table"
                "(https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support#speech"
                "-translation)",
                file=sys.stderr)
            sys.exit(1)
        print(f'Translated into "{to_language}": {translation}')
    else:
        try:
            translation = result.translations[to_language]
        except Exception as e:
            print(f"Error:{e}.", file=sys.stderr)
            print(
                "Error: wrong original or target language code. For original language code, please choose one from "
                "Loale in Speech-to-text table"
                "(https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support#speech"
                "-to-text).\n "
                "For target language code, please choose one from Language Code in Text languages table"
                "(https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support#speech"
                "-translation)",
                file=sys.stderr)
            sys.exit(1)
        print(
            f'Recognized: "{result.text}". Translated into "{to_language}": {translation}'
        )

    # ----------------------------------------------------------------------
    # Get language code. "Original": is from Loale in Speech-to-text table.
    # (https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support#speech-to-text)
    # "target": is from Language Code in Text languages table.
    # (https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support#speech-translation)
    # Make sure the text language in Text Language table exists in Neural
    # voices table.
    # ----------------------------------------------------------------------

    language_to_voice_map = {}
    dataframe_speech = pandas.read_csv(os.path.join(os.getcwd(),
                                                    "data/text-to-speech.txt"),
                                       delimiter="\t")

    for index, row in dataframe_speech.iterrows():
        if row[1] == "zh-HK":
            language_code = "yue"
        elif row[1] == "zh-CN":
            language_code = "zh-Hant"
        else:
            language_code = row[1][0:2]

        language_to_voice_map[language_code] = row[3]

    speech_conf = speechsdk.SpeechConfig(subscription=key, region=region)
    try:
        speech_conf.speech_synthesis_voice_name = language_to_voice_map.get(
            to_language)
    except:
        print(
            f"Error: This target language ({to_language}) doesn't have speech.",
            file=sys.stderr)
        sys.exit(1)

    if output:
        file_location = os.path.join(get_cmd_cwd(), output)
        audio_conf = speechsdk.audio.AudioOutputConfig(filename=file_location)
        synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_conf,
                                                  audio_config=audio_conf)
    else:
        synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_conf)
    synthesizer.speak_text_async(translation).get()