Python WatsonSTT Exemples, cli.stt.WatsonSTT Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : tests.py Projet : pratyushsingh97/Custom-Speech-to-Text-Modeling

def testing_adding_corpus2(mock):
    mock.return_value = PosixPath('blah')
    mock.exists().return_value = True
    mock.is_file().return_value = False
    with pytest.raises(FileExistsError,
                       match="The path of the file is invalid"):
        WatsonSTT(url=url).add_corpus("blah")

Exemple #2

0

Afficher le fichier

    def _model_keys(self) -> tuple:
        """ Maps the model name and created date along with the description as the key 
        and the customization id as the value.

        Args:
        None

        Returns:
        models_to_id: the dictionary that maps the model name, 
        created time, and description as the key and the customization id as the key
        
        model_name: name of the models to present to user
        """

        all_models = WatsonSTT.all_model_status(url=self.url, api_key=self.api_key)
        all_models = all_models['customizations']
        all_models = sorted(all_models, key=itemgetter('created'), reverse=True) # sort models by date
        
        model_name = []
        models_to_id = {}

        for model in all_models:
            key = f"{model['name']} -- {model['description']} -- Created at: {model['created']}"
            model_name.append({"name": key})
            models_to_id[key] = model['customization_id']
        
        return models_to_id, model_name

Exemple #3

0

Afficher le fichier

Fichier : tests.py Projet : pratyushsingh97/Custom-Speech-to-Text-Modeling

def test_create_model(mock, error_codes):
    if error_codes == 201:
        mock.return_value.status_code = 201

        expected_response = json.dumps({'customization_id': '1234'})
        mock.return_value.text = expected_response

        response = WatsonSTT(url=url).create_model(name="Testing Model",
                                                   descr="From test")

        assert response == '1234'

        # testing if the API messes up does not have a 'customization_id'
        missing_response = json.dumps({'blah': 'blah'})
        mock.return_value.text = missing_response

        with pytest.raises(
                Exception,
                match="The Watson STT request failed. Please try again."):
            WatsonSTT(url).create_model(name="Testing", descr="from test")

Exemple #4

0

Afficher le fichier

def clean_up(url, customization_ids):
    config = ConfigParser()
    config.read('keys/conf.ini')
    api_key = config['API_KEY']['WATSON_STT_API']

    if customization_ids[0] == 'all':
        confirmation = input(
            'Are you sure you want to delete all of the trained models? (y/N): '
        )
        confirmation = confirmation.strip().lower()

        if confirmation in ('y', 'yes'):
            models = WatsonSTT.all_model_status(url=url, api_key=api_key)
            if 'customizations' in models.keys():
                models = models['customizations']

                for model in tqdm(models,
                                  desc="Deleting All Models",
                                  leave=False):
                    _id = model['customization_id']
                    WatsonSTT.delete_model(url, api_key, _id)
            else:
                print("No models to delete.")

        elif confirmation in ('n', 'no'):
            print("No models were deleted. Action cancelled.")
        else:
            print("Could not understand response.")

        return

    else:
        for ids in tqdm(customization_ids,
                        desc="Deleting Customization Models",
                        leave=False):
            result = WatsonSTT.delete_model(url, api_key, customization_id=ids)

            if not result:
                return

Exemple #5

0

Afficher le fichier

Fichier : main.py Projet : pratyushsingh97/Custom-Speech-to-Text-Modeling

def model_status(url, print=1) -> None:
    """ A wrapper function that returns the status of models.
    This wrapper function is used when the --verbose flag is passed and 
    when the user passes "latest" to train the latest model
    """
    config = ConfigParser()
    config.read('keys/conf.ini')
    api_key = config['API_KEY']['WATSON_STT_API']

    models = WatsonSTT.all_model_status(url=url, api_key=api_key)

    # flag is set by default to print the results to stdout
    if print:
        pprint(models)

    return models

Exemple #6

0

Afficher le fichier

    def runner(self):
        """ The runner parses the options selected and then calls 
        the backend functions from WatsonSTT class
        """
        try:
            account_details = prompt(self.account_details(), style=custom_style_2)
            
            if account_details['watson_stt_url'] == "None":
                print()
                print("Attempting to read in url from configuration file")
                try:
                    path = Path('./keys/conf.ini').resolve()
                    config = ConfigParser()
                    config.read(path)
                    self.url = config['URL']['WATSON_STT_URL']

                    print("Succesfully read URL.")
                    print()
                
                except:
                    print("Uh oh! We failed to read the URL from the configuration file.")
                    raise ValueError("Failure to read URL from conf.ini file")
            
            else:
                url = account_details['watson_stt_url']
                self.url = url
                self._save_url(self.url)

            if account_details['watson_stt_api_key'] == "None":
                print("Attempting to read in API key from configuration file")
                try:
                    path = Path('./keys/conf.ini').resolve()
                    config = ConfigParser()
                    config.read(path)
                    self.api_key = config['API_KEY']['WATSON_STT_API']

                    print("Succesfully read in API key.")
                    print()
                
                except:
                    print("Uh oh! We failed to read the API Key from the configuration file.")
                    raise ValueError("Failure to read API key from conf.ini file")

            else:
                api_key = account_details['watson_stt_api_key']
                self.api_key = api_key
                self._save_api_key(api_key)

            answers = prompt(self.main_questions(), style=custom_style_2)
            model_options  = answers['custom_models_options']

            for model_option in model_options:

                if 'Train' in model_option:
                    # ask train questions
                    train = prompt(self.train_questions(), style=custom_style_2)

                    model_name = train['model_name']
                    model_descr = train['model_description']
                    oov_file_path = train['oov_file_path']

                    try:
                        stt = WatsonSTT(url=self.url)
                        stt.create_model(name=model_name, descr=model_descr)
                        stt.add_corpus(oov_file_path)
                        stt.training()
                    except Exception as e:
                        print(e)
                
                if 'Update'in model_option:
                    update = prompt(self.update_questions(), style=custom_style_2)
                    
                    model_customization_id = update['customization_id']
                    oov_file_path = update['oov_file_path']

                    try:
                        stt = WatsonSTT(url=self.url, customization_id=model_customization_id)
                        stt.add_corpus(corpus_path=oov_file_path)
                        stt.training()
                    except Exception as e:
                        print(e)

                if 'Evaluate' in model_option:
                    model_id, evaluate_answers = self.evaluate_questions()
                    evaluate_models = prompt(evaluate_answers, style=custom_style_2)
                    
                    path_to_audio_file = evaluate_models['audio_file']
                    evaluate_models = evaluate_models['models_evaluate']

                    custom_ids = [model_id[eval_model] for eval_model in evaluate_models]

                    for index, id in enumerate(custom_ids):
                        stt = WatsonSTT(url=self.url, customization_id=id)
                        try:
                            results = stt.transcribe(path_to_audio_file)

                            print()
                            print("*" * 60)
                            print(f"Transcription Results from {evaluate_models[index]}:")
                            pprint(results)
                            print()
                            print("*" * 60)
                            print()
                        
                        except Exception as e:
                            print("*" * 60)
                            print()
                            print(f"Transcribing model {evaluate_models[index]} failed.")
                            print(e)
                            print("*" * 60)
                            print()

                
                if 'See Available Models' in model_option:
                    models = WatsonSTT.all_model_status(url=self.url, api_key=self.api_key)
                    pprint(models)

                # check if the model can be deleted
                # error of the model should be 409
                if 'Delete' in model_option:
                    delete_options = prompt(self.delete(), style=custom_style_2) 
                    delete_options = delete_options['delete_all'].strip().lower()
                    
                    if delete_options in ('y', 'yes'):
                        clean_up.clean_up(url=self.url, customization_ids=['all'])
                    elif delete_options in ('n', 'no'):
                        models_id, models_delete = self._delete_specific_models()
                        selected_models = prompt(models_delete, style=custom_style_2)
                        
                        models_to_delete = selected_models['models_to_delete']
                        custom_ids_del_models = [models_id[del_model] for del_model in models_to_delete]

                        # delete the models 
                        clean_up.clean_up(self.url, custom_ids_del_models)
                    else:
                        print("Only \'yes\' and \'no\' inputs allowed")
                        raise KeyboardInterrupt
            
        except KeyboardInterrupt:
            print("Action Cancelled")

Exemple #7

0

Afficher le fichier

Fichier : tests.py Projet : pratyushsingh97/Custom-Speech-to-Text-Modeling

def test_training_no_customization_id():
    with pytest.raises(ValueError, match="No customization id is provided!"):
        WatsonSTT(url).training()

Exemple #8

0

Afficher le fichier

Fichier : tests.py Projet : pratyushsingh97/Custom-Speech-to-Text-Modeling

def test_invalid_create_model_params():
    with pytest.raises(TypeError, match=r".* 'name' .*"):
        WatsonSTT(url).create_model(name=InvalidType(), descr="valid")

    with pytest.raises(TypeError, match=r".* 'descr' .*"):
        WatsonSTT(url).create_model(name="valid", descr=InvalidType())

Exemple #9

0

Afficher le fichier

Fichier : main.py Projet : pratyushsingh97/Custom-Speech-to-Text-Modeling

def main():
    """Entry point of the CLI. 
    
    The program will accept either a "--visual" flag, where it then kicks out to the visual CLI.
    Otherwise, it will accept name, descr, url, oov_file_path, verbose, delete, eval, or audio_file flags.
    Passing certain combination of these flags will trigger different actions such as train, evaluate, or delete.

    Args:
    --visual: kick of the visual CLI. At this point, the control of the program is handed over to the visual.py file
    --url: the url of the instance
    --name: name of the model
    --descr: the description of the model
    --oov_file_path: the filepath of the grammar, vocabulary, or corpus used to train the model
    --eval: transcribe a model
    --verbose: list out the models
    --audio_file: path to the audio file

    Returns:
    None
    """
    # setting up the command line
    argparser = argparse.ArgumentParser()

    argparser.add_argument(
        '--visual',
        help="Run CLI in visual mode. All other flags passed are ignored",
        action="store_true")
    argparser.add_argument('--name', help="Name of the model")
    argparser.add_argument('--descr',
                           help="A short description of the custom model")
    argparser.add_argument('--url',
                           help="This is the URL of the Watson STT model. \
                                           Found on the start page of the Watson STT tooling."
                           )
    argparser.add_argument('--oov_file_path',
                           help="The path of the out-of-vocabulary \
                                                    file (the corpus, words, or grammar)"
                           )
    argparser.add_argument('-v', '--verbose', '--list_models', help="Shows you all \
                                                                     of the models trained on this account"                                                                                                           , \
                                                                action="store_true")
    argparser.add_argument(
        '--delete',
        nargs='+',
        help="Pass the customization id of the models to delete")
    argparser.add_argument(
        '--eval',
        help="Evaluate the trained model against an audio-file. \
                                           \nPass in the \'customization_id\' of the model or \
                                            pass \'latest\' to train the latest trained model. \
                                            \nThe \'audio_file\' flag must be set as well!"
    )
    argparser.add_argument('--audio_file',
                           help="The path of the audio file to transcribe.")

    args = argparser.parse_args()

    visual = args.visual
    name = args.name
    descr = args.descr
    url = args.url
    file_path = args.oov_file_path
    verbose = args.verbose
    delete = args.delete
    evaluate = args.eval
    audio_file = args.audio_file

    if visual:
        VisualSTT().runner()

    else:
        if url is None:
            raise Exception("Must pass URL")

    # kick of training
    if name and descr and url and file_path:
        custom_stt = WatsonSTT(url=url)
        custom_stt.create_model(name=name, descr=descr)
        custom_stt.add_corpus(file_path)
        custom_stt.training()

    # just add the corpus
    # @TODO: how to create a model and train with an existing corpus?
    # @TODO: is this feature even neccesary?
    if url and file_path and name is None is file_path is None:
        print("Adding corpus...")
        # @TODO: training a model with an existing uploaded corpus
        custom_stt = WatsonSTT(url=url)
        custom_stt.add_corpus(file_path)
        print("Finished adding corpus")

    # print out the models
    if url and verbose:
        print("Retrieving Models...")
        model_status(url)

    if url and evaluate and audio_file:
        # pass in customization id
        print("Checking audio file...")
        path = Path(audio_file)
        if not path.exists() and not path.is_file():
            raise FileExistsError("Cannot find audio file")

        if evaluate == "latest":
            models = model_status(url, print=0)
            models = models['customizations']

            if len(models) == 0:
                print(
                    "You do not have any trained models. Please create and train a model before evaluating."
                )
                return

            # convert the date string into date object
            for model in models:
                model['created'] = _to_date(model['created'])

            models = sorted(models, key=itemgetter('created'), reverse=True)
            evaluate = models[0]['customization_id']

        print("Transcribing the audio file...")
        custom_stt = WatsonSTT(url=url, customization_id=evaluate)
        results = custom_stt.transcribe(audio_file)
        print("Transcribing finished")
        print()
        pprint(results)

    if url and delete:
        clean_up.clean_up(url, delete)