def mock_model(tmpdir_factory): base_path = tmpdir_factory.mktemp("pipeline") base_path = Path(base_path) if not base_path.joinpath('/state').exists(): kaldi = KaldiInterface(f'{base_path}/state') ds = kaldi.new_dataset('dataset_x') ds.add_directory('/recordings/transcribed') ds.select_importer('Elan') ds.process() pd = kaldi.new_pron_dict('pron_dict_y') pd.link(ds) pd.set_l2s_path('/recordings/letter_to_sound.txt') pd.generate_lexicon() m = kaldi.new_model('model_z') m.link(ds, pd) m.build_kaldi_structure() # TODO: remove this line m.train() # may take a while else: kaldi = KaldiInterface.load(f'{base_path}/state') m = kaldi.new_model('model_z', use_existing=True) return (kaldi, m)
def create_app(test_config=None): # Called by the flask run command in the cli. # Setup static resources # create and configure the app # auto detect for yarn watch or yarn build static_dir_watch = '/js' static_dir_build = '/static' if 'js' in os.listdir(GUI_PUBLIC_DIR): # using yarn watch static_dir = static_dir_watch else: static_dir = static_dir_build # if os.environ.get('FLASK_ENV') == 'production': # static_dir = static_dir_build # else: # static_dir = static_dir_watch print('using static_dir:', static_dir) # Create a custom Flask instance defined in the app.py file. Same as a # normal Flask class but with a specialised blueprint function. app = Flask(__name__, instance_relative_config=True, static_folder=GUI_PUBLIC_DIR + static_dir, static_url_path=static_dir) # When making this multi-user, the secret key would require to be a secure hash. app.config.from_mapping(SECRET_KEY='dev') # For a single user, storing the Kaldi interface object is okay to do in # the app.config, however, this would need to change for multi-user. # Each user would require a unique KaldiInterface. One KaldiInterface # stores all the artifacts that user has generated. interface_path = Path('/elpis/state') if not interface_path.exists(): app.config['INTERFACE'] = KaldiInterface(interface_path) else: app.config['INTERFACE'] = KaldiInterface.load(interface_path) app.config['CURRENT_DATASET'] = None # not okay for multi-user app.config['CURRENT_PRON_DICT'] = None # not okay for multi-user app.config['CURRENT_MODEL'] = None # not okay for multi-user # add the endpoints routes app.register_blueprint(endpoints.bp) print(app.url_map) # the rest of the routes below are for the single file react app. @app.route('/index.html') def index_file(): """Redirects to '/' for React.""" return redirect('/') @app.route('/', defaults={'path': ''}) @app.route("/<path:path>") def index(path): print('in index with:', path) with open(f"{GUI_PUBLIC_DIR}/index.html", "r") as fin: content = fin.read() return content @app.route('/favicon.ico') def favicon(): with open(f"{GUI_PUBLIC_DIR}/favicon.ico", "rb") as fin: return fin.read() return app
# Example code for using elpis from python. # Must run from the docker contianer and the objects must not exist in the # interface directory already ('dsy', 'mx', 'tx'), if they do, make the kaldi # interface in a new location. from elpis.wrappers.objects.interface import KaldiInterface # Step 0 # ====== # Create a Kaldi interface directory (where all the associated files/objects # will be stored). kaldi = KaldiInterface.load('/elpis/state') # Step 1 # ====== # Setup a dataset to to train data on. ds = kaldi.get_dataset('dsy') with open('/elpis/abui_toy_corpus/data/1_1_4.eaf', 'rb') as feaf, open('/elpis/abui_toy_corpus/data/1_1_4.wav', 'rb') as fwav: ds.add_fp(feaf, 'f.eaf') ds.add_fp(fwav, 'f.wav') ds.process() # Step 2 # ====== # Link dataset to a new model, then train the model. m = kaldi.get_model('mx') m.link(ds) m.set_l2s_path('/elpis/abui_toy_corpus/config/letter_to_sound.txt') m.generate_lexicon() m.train() # may take a while