Esempio n. 1
0
class HPCToolkitProfiler(Profiler):
    def __init__(self, profiler_settings, verbose=False):
        super().__init__(profiler_settings, verbose)

        self.hpcrun_cmd = Command('hpcrun')
        self.hpcrun_cmd = self.hpcrun_cmd.bake('-e', 'WALLCLOCK@5000')
        self.hpcstruct_cmd = Command('hpcstruct')
        self.hpcprof_cmd = Command('mpirun')
        self.hpcprof_cmd = self.hpcprof_cmd.bake('-np', '1', 'hpcprof-mpi',
                                                 '--metric-db', 'yes')

    def profile(self, repo):
        profiles = {}
        for test in repo.itertests():
            vprint(self.verbose,
                   'Profiling test \'{}\'...'.format(test['name']))
            exec_path = os.path.join(test['prefix'], test['executable'])

            hpcstruct_name = '{}.hpcstruct'.format(test['name'])
            hpcmeasurements_name = 'hpctoolkit-{}-measurements'.format(
                test['name'])
            hpcdatabase_name = 'hpctoolkit-{}-database'.format(test['name'])

            # try to generate hpcstruct
            try:
                self.hpcstruct_cmd(exec_path, '--output', hpcstruct_name)
            except:
                vprint(self.verbose, 'Failed to create hpcstruct file...')
                continue

            # run test
            try:
                self.hpcrun_cmd('--output', hpcmeasurements_name, exec_path,
                                test['args'])
            except:
                vprint(self.verbose,
                       'Running test \'{}\' failed...'.format(test['name']))
                continue

            # generate profile
            try:
                self.hpcrun_cmd('--output', hpcmeasurements_name, exec_path,
                                test['args'])
                self.hpcprof_cmd('-S', hpcstruct_name, '-I', './+', '--output',
                                 hpcdatabase_name, hpcmeasurements_name)
            except:
                vprint(self.verbose,
                       'Running test \'{}\' failed...'.format(test['name']))
                continue

            # finally read hatchet profile
            profiles[test['name']] = ht.GraphFrame.from_hpctoolkit(
                hpcdatabase_name)

            # and now delete the leftover files/folders
            rm('-r', hpcstruct_name, hpcmeasurements_name, hpcdatabase_name)

        return profiles
Esempio n. 2
0
def exe(ctx, context, target_url, pending, success, failure, error, command):
    def _status_set(state, description):
        log.info('%s -> %s', context, state)
        ctx.invoke(status_set,
                   context=context,
                   target_url=target_url,
                   state=state,
                   description=description)

    try:
        _status_set('pending', pending)
        cmd = Command(command[0])
        cmd = cmd.bake(command[1:]) if len(command) > 1 else cmd
        cmd(_fg=True)
        _status_set('success', success)
        exit(0)
    except ErrorReturnCode as e:
        log.error('Command failed with exit code: %d', e.exit_code)
        _status_set('failure', failure)
    except CommandNotFound as e:
        log.error('Command not found: %s', e)
        _status_set('error', error)
    except Exception as e:
        log.error(e, exc_info=True)
        _status_set('error', error)

    exit(1)
Esempio n. 3
0
    def get_command(self):
        """ Returns a reusable sh.Command object that can execute multiple different SFTP commands.
        """
        # A list of arguments that will be added to the base command
        args = []

        # Buffer size is always available
        args.append('-B')
        args.append(self.buffer_size)

        # Bandwidth limit is always available
        args.append('-l')
        args.append(self.bandwidth_limit)

        # Preserving file and directory metadata is optional
        if self.should_preserve_meta:
            args.append('-p')

        # Immediate flushing is optional
        if self.should_flush:
            args.append('-f')

        # Compression is optional
        if self.is_compression_enabled:
            args.append('-C')

        # Forcing a particular IP version is optional
        if self.force_ip_type:
            args.append(ip_type_map[self.force_ip_type])

        # Port is optional
        if self.port:
            args.append('-P')
            args.append(self.port)

        # Identity file is optional
        if self.identity_file:
            args.append('-i')
            args.append(self.identity_file)

        # SSH config file is optional
        if self.ssh_config_file:
            args.append('-F')
            args.append(self.ssh_config_file)

        # Base command to build additional arguments into
        command = Command(self.sftp_command)
        command = command.bake(*args)

        return command
Esempio n. 4
0
    def ansible(self, cmd='ansible-playbook'):
        tmp_inventory_dir = os.path.join(data_dir(), 'vagrant-inventory')
        if not os.path.isdir(tmp_inventory_dir):
            os.makedirs(tmp_inventory_dir)

        # create a temporary inventory file for ansible
        tmp_inventory_file = os.path.join(tmp_inventory_dir, self.vm_name())
        with open(tmp_inventory_file, 'w') as f:
            f.write('%s ansible_ssh_host=%s ansible_ssh_port=22 '
                    'ansible_ssh_private_key_file=%s' % (
                        self.vm_name(),
                        self.ip(),
                        self.ssh_key()
                    ))

        ansible = Command(cmd)
        new_env = ansible_env(os.environ.copy())

        return ansible.bake('-i', tmp_inventory_file,
                            '--extra-vars', '@%s' %
                            self.project.config_file(),
                            _env=new_env,
                            _out_bufsize=0,
                            _err_bufsize=0)
Esempio n. 5
0
class Snapshot:
    "Take scylla snapshot and upload it using provided uploader"

    def __init__(self,
                 scylla_data_dir,
                 db_path,
                 storage_obj,
                 nodetool_path='/usr/bin/nodetool',
                 cqlsh_path='/usr/bin/cqlsh',
                 cqlsh_host='127.0.0.1',
                 cqlsh_port='9042',
                 prefix='scyllabackup',
                 max_workers=4):
        self.scylla_data_dir = scylla_data_dir
        self.db = DB(db_path)
        self.db_path = db_path
        self.nodetool = Command(nodetool_path)
        self.cqlsh = Command(cqlsh_path).bake(cqlsh_host, cqlsh_port)
        self._upload_queue = gevent.queue.JoinableQueue()
        self._download_queue = gevent.queue.JoinableQueue()
        self._delete_queue = gevent.queue.JoinableQueue()
        self._verify_queue = gevent.queue.JoinableQueue()
        self._storage = storage_obj
        self._prefix = prefix
        self.db_key = self._prefix + '/' + os.path.basename(self.db_path)
        self.max_workers = max_workers

    @staticmethod
    def mkdir_p(path):
        """Function to handle recursive directory creation like `mkdir -p`.
        It does not fail if directory already exists at path. If the path
        is some other file type then reraise exception

        :param path: Path where directory needs to be created
        :returns: Nothing
        :rtype: None

        """
        try:
            os.makedirs(path)
        except OSError as exc:
            if exc.errno == errno.EEXIST and os.path.isdir(path):
                pass
            else:
                raise

    def snapshot_file_glob(self, snapshot_name, keyspace_name='*'):
        """Function to return a glob iterator for given snapshot name.
        Restrict output to a specific keyspace if specified

        :param snapshot_name: Name of the snapshot whose files you want a
                              globbing iterator for.
        :param keyspace_name: Name of the keyspace within the specified
                              snapshot, you want the globbing to restrict to.
        :returns: An generator for looping over files in a snapshot
        :rtype: Iterator[str]

        """
        logger.debug("Gathering snapshot files from "
                     "data dir {0}".format(self.scylla_data_dir))
        snapshot_path = os.path.join(self.scylla_data_dir, keyspace_name, '*',
                                     'snapshots', snapshot_name, '*')

        return glob.iglob(snapshot_path)

    def _nodetool_snapshot_op(self,
                              snapshot_name,
                              keyspace_name=None,
                              op='snapshot'):
        """Wrap nodetool utility for strictly taking or deleting snapshots.
        The function takes a snapshot name and optionally keyspace name.
        Taking/deleting is controlled by `op` parameter

        :param snapshot_name: Tag name of the snapshot to be taken or deleted
        :param keyspace_name: Restrict snapshot operation to a keyspace if
                              specified
        :param op: Snapshot operation keyword. Valid values are 'snapshot' for
                   taking a snapshot or 'clearsnapshot' for deleting snapshot
        :returns: Nothing
        :rtype: None

        """
        snapshot_log = "snapshot {0}".format(snapshot_name)
        if keyspace_name:
            snapshot_log += " for keyspace_name {1}".format(keyspace_name)

        if op == 'snapshot':
            debug_message = "Taking " + snapshot_log
            error_message = "Failed while taking " + snapshot_log
        elif op == 'clearsnapshot':
            debug_message = "Deleting " + snapshot_log
            error_message = "Failed while deleting " + snapshot_log
        else:
            raise ValueError("Snapshot operation can be "
                             "one of snapshot or clearsnapshot")

        logger.debug(debug_message)

        try:

            cmd = self.nodetool.bake(op, '-t', snapshot_name)
            if keyspace_name:
                cmd.bake(keyspace_name)
            cmd()

        except ErrorReturnCode as e:
            logger.error(error_message)
            log_shell_exception_and_exit(e)

    def nodetool_take_snapshot(self, snapshot_name, keyspace_name=None):
        """Take snapshot with specified snapshot name tag, and optionally
        restrict the operation to keyspace name if provided

        :param snapshot_name: Tag name of the snapshot to be taken
        :param keyspace_name: Restrict snapshot operation to a keyspace if
                              specified
        :returns: Nothing
        :rtype: None

        """
        self._nodetool_snapshot_op(snapshot_name, keyspace_name, op='snapshot')

    def snapshot_schema(self, snapshot_name):
        logger.debug("Trying to take snapshot of schema")
        try:
            schema = self.cqlsh('-e', 'DESC SCHEMA;')
            self.db.add_snapshot(snapshot_name, schema.stdout)
        except ErrorReturnCode as e:
            logger.error(
                "Failed to take schema backup for snapshot {0}".format(
                    snapshot_name))
            log_shell_exception_and_exit(e)

    def nodetool_delete_snapshot(self, snapshot_name, keyspace_name=None):
        """Delete snapshot with specified snapshot name tag, and optionally
        restrict the operation to keyspace name if provided

        :param snapshot_name: Tag name of the snapshot to be deleted
        :param keyspace_name: Restrict snapshot operation to a keyspace if
                              specified
        :returns: Nothing
        :rtype: None

        """
        self._nodetool_snapshot_op(snapshot_name,
                                   keyspace_name,
                                   op='clearsnapshot')

    def upload_snapshot(self, snapshot_name, keyspace_name='*'):
        """Take and upload a scylladb snapshot to cloud storage

        :param snapshot_name: Tag name of the snapshot to be taken
        :param keyspace_name: Restrict snapshot operation to a keyspace if
                              specified
        :returns: Nothing
        :rtype: None

        """
        self.snapshot_schema(snapshot_name)

        # The path for scylla db snapshot files has following prefix. We want
        # first five parts for backup. Example path:
        # <scylla_data_dir>/<keyspace>/<table>/snapshots/<snapshot>/<file>.db
        def split_path(path):
            for i in range(5):
                path, basename = os.path.split(path)
                yield basename

        for i in range(self.max_workers):
            gevent.spawn(self.file_upload_worker)

        file_list = []
        for file_name in self.snapshot_file_glob(snapshot_name, keyspace_name):
            # Skipping upload of manifest.json files which contains the list of
            # backup files for a snapshot. We do not need this info is present
            # in database backup.
            if not fnmatch.fnmatch(file_name, '*/manifest.json'):
                (file_base_name, _, _, table_name,
                 keyspace_name) = split_path(file_name)
                logger.info("Adding file {0}, file: {1}, "
                            "table: {2}, keyspace: {3} "
                            "in upload_queue".format(file_name, file_base_name,
                                                     table_name,
                                                     keyspace_name))

                file_list.append((keyspace_name, table_name, file_base_name))
                self._upload_queue.put(
                    (file_name, file_base_name, keyspace_name, table_name))

        logger.info("Add file list in database for "
                    "snapshot {0}".format(snapshot_name))
        self.db.add_snapshot_files(snapshot_name, file_list)
        self._storage.upload_file(self.db_key, self.db_path)

        self._upload_queue.join()

    def download_db(self, path):
        """Download scyllabackup metadata db file to specified path

        :param path: File path to download scyllabackup metadata db
        :returns: Nothing
        :rtype: None

        """
        self._storage.download_file(self.db_key, path)

    def download_snapshot(self, path, snapshot_name, keyspace_name=None):
        """Download snapshot from cloud storage reading the scyllabackup
        metadata db

        :param path: Directory to download a scyllabackup snapshot
        :param snapshot_name: Tag name of the snapshot to be downloaded
        :param keyspace_name: Restrict download operation to a keyspace if
                              specified
        :returns: Nothing
        :rtype: None

        """
        snapshot_id = self.db.find_snapshot_id(snapshot_name)
        if snapshot_id is None:
            logger.error(
                "Specified snapshot doesn't exist, please specify a valid snapshot."
            )
            sys.exit(2)

        for file_tuple in self.db.find_snapshot_files(snapshot_name,
                                                      keyspace_name):
            # file_tuple = tuple(keyspace,tablename,file)
            self._download_queue.put(file_tuple)
        for i in range(self.max_workers):
            gevent.spawn(self.file_download_worker, path)
        self._download_queue.join()

    def file_download_worker(self, path):
        """Worker for downloading snapshot files. This worker is mapped to
        gevent threads for concurrency. Number of threads is configured by
        `self.max_workers`

        :param path: Directory path where snapshot files will be downloaded
        :returns: Nothing
        :rtype: None

        """
        while True:
            try:
                # file_tuple = tuple(keyspace,tablename,file)
                file_tuple = self._download_queue.get()
                storage_key = '/'.join((self._prefix, ) + file_tuple)
                Snapshot.mkdir_p(os.path.join(path, *file_tuple[:-1]))
                self._storage.download_file(storage_key,
                                            os.path.join(path, *file_tuple))

            except Exception as e:
                logger.exception("Unexpected exception encountered")
                sys.exit(4)
            finally:
                self._download_queue.task_done()

    def verify_snapshot(self, snapshot_name):
        """Verifies that all files for a given snapshot name are present in
        the cloud storage. Useful for a consistency check before downloading
        snapshot

        :param snapshot_name: Tag name of the snapshot to be verified
        :returns: True if all files for given snapshot are present in cloud
                  storage, else False
        :rtype: bool

        """
        self.verify_success = True

        for i in range(self.max_workers):
            gevent.spawn(self.file_verify_worker)

        for file_tuple in self.db.find_snapshot_files(snapshot_name):
            # file_tuple = tuple(keyspace,tablename,file)
            self._verify_queue.put(file_tuple)

        self._verify_queue.join()

        return self.verify_success

    def file_verify_worker(self):
        """Worker for verifying snapshot files. This worker is mapped to
        gevent threads for concurrency. Number of threads is configured by
        `self.max_workers`

        :returns: Nothing
        :rtype: None

        """
        while True:
            try:
                # file_tuple = tuple(keyspace,tablename,file)
                file_tuple = self._verify_queue.get()
                storage_key = '/'.join((self._prefix, ) + file_tuple)
                remote_file = self._storage.get_object_properties(storage_key)
                if remote_file is None:
                    logger.error("Remote file {0} "
                                 "doesn't exist".format(storage_key))
                    self.verify_success = False
                else:
                    logger.debug("Remote file {0} "
                                 "is present in storage".format(storage_key))
            except Exception as e:
                logger.exception("Unexpected exception encountered")
                sys.exit(4)
            finally:
                self._verify_queue.task_done()

    def file_upload_worker(self):
        """Worker for uploading files. This worker is mapped to gevent threads
        for concurrency. Number of threads is configured by `self.max_workers`

        :returns: Nothing
        :rtype: None

        """

        while True:
            try:
                (file_name, file_base_name, keyspace_name,
                 table_name) = self._upload_queue.get()

                key = '/'.join(
                    (self._prefix, keyspace_name, table_name, file_base_name))

                remote_file = (self._storage.get_object_properties(
                    key, metadata=True))

                file_stat = os.stat(file_name)
                file_size = file_stat.st_size
                file_mtime = str(int(file_stat.st_mtime))
                if (remote_file and remote_file['size'] == file_size
                        and remote_file['metadata']['mtime'] == file_mtime):
                    logger.info('Remote file size/mtime matches for "{0}".'
                                ' No reupload required'.format(key))
                else:
                    if remote_file:
                        logger.warn('Remote file size/mtime mismatch for "{0}"'
                                    '. Reupload required'.format(key))

                    logger.info('Uploading file "{0}"'.format(key))
                    self._storage.upload_file(key,
                                              file_name,
                                              metadata={'mtime': file_mtime})
            except Exception as e:
                logger.exception("Unexpected exception encountered")
                sys.exit(4)
            finally:
                self._upload_queue.task_done()

    def delete_snapshot(self, snapshot):
        """Delete all files older than a given snapshot from cloud storage and
        cleanup scyllabackup metadata db

        :param snapshot_name: Tag name of the snapshot before which all files
                              are to be deleted
        :returns: Nothing
        :rtype: None

        """
        for file_tuple in self.db.find_deletable_files(snapshot):
            self._delete_queue.put(file_tuple)

        for i in range(self.max_workers):
            gevent.spawn(self.file_delete_worker)
        self._delete_queue.join()
        self.db.delete_snapshots_files_older_than(snapshot)
        if self.db.cleanup_files_db():
            self.db.vacuum()

    def file_delete_worker(self):
        """Worker for deleting files. This worker is mapped to gevent threads
        for concurrency. Number of threads is configured by `self.max_workers`

        :returns: Nothing
        :rtype: None

        """

        while True:
            try:
                # file_tuple = tuple(keyspace,tablename,file)
                file_tuple = self._delete_queue.get()
                storage_key = '/'.join((self._prefix, ) + file_tuple)
                self._storage.delete_key(storage_key)
            except AzureMissingResourceHttpError as e:
                logger.error(
                    "Deletion of blob {0} failed. It's already deleted or missing."
                    .format(storage_key))
            except Exception as e:
                logger.exception("Unexpected exception encountered")
                sys.exit(4)
            finally:
                self._delete_queue.task_done()

    def find_new_table_path(self, keyspace_name, table_name):
        """This function returns the on-disk directory of a table where
        sstables are stored for scylladb given the keyspace and table name.

        This utility is required for creating this restore mapping of table
        directory between a downloaded snapshot and a freshly created scylladb
        cluster for restore. The mapping is returned as a dictionary.

        The mapping is required because scylladb generates a UUID for each
        table in keyspace and store sstable files in a dir named `tablename +
        '-' + <UUID without any hyphens>`. This UUID is freshly generated when
        a keyspace is created in scylla. When restoring a snapshot, the UUID of
        table path of a newly created scylladb instance where schema is
        restored will mismatch the table path of a downloaded snapshot. This
        happens as the snapshot was created on different cluster and will have
        different uuid for each table. By creating a mapping, we can automate
        the restore process.


        :param keyspace_name: Name of the keyspace you want restore mapping for
        :param table_name: Name of the table in keyspace you want restore
                           mapping for
        :returns: Path of directory where sstables are stored for table in
                  given keyspace
        :rtype: str

        """
        cql = ("EXPAND ON; "
               "SELECT id FROM system_schema.tables "
               "WHERE keyspace_name = '{0}' "
               "AND table_name= '{1}';").format(keyspace_name, table_name)
        cql_cmd = self.cqlsh.bake('--no-color', '-e', cql)
        # Sample output of above command (ignore indentation, includes blank lines)
        # """
        # Now Expanded output is enabled
        #
        # @ Row 1
        # ----+--------------------------------------
        #  id | 08ae880a-52e9-43ec-9ed1-55afc2e8e7c6
        #
        # (1 rows)
        # """
        uuid_lines = [
            line for line in cql_cmd().splitlines()
            if line.startswith(' id | ')
        ]
        if len(uuid_lines) != 1:
            raise ValueError(('Matching id found for given keyspace and '
                              'table not equal to 1'))
        uuid = uuid_lines[0].split()[-1].replace('-', '')
        table_path_name = "{0}-{1}".format(table_name, uuid)
        return os.path.join(self.scylla_data_dir, keyspace_name,
                            table_path_name)

    def restore_schema(self, restore_schema_path):
        """Function to restore schema in scylladb from a cql file. This can be
        done manually also directly via cqlsh. This just abstracts the
        interface and is only expected to run on a new/clean cluster.

        :param restore_schema_path: The path of the cql file to be imported in
                                    scylladb
        :returns: Nothing
        :rtype: None

        """
        try:
            self.cqlsh.bake('-f')(restore_schema_path)
        except ErrorReturnCode as e:
            logger.error("Error while restoring schema")
            log_shell_exception_and_exit(e)

    def restore_snapshot_mapping(self, restore_path, keyspace_name):
        """Returns a dictionary which represents a path mapping from an already
        downloaded snapshot tables to the freshly created tables for a given
        keyspace. This is required as the directory path of table in snapshot
        mismatches directory path of table in a keyspace for a newly created
        cluster. Refer documentation of `find_new_table_path` function for more
        details why this happens.

        :param restore_path: The directory path where the snapshot files have
                             been downloaded
        :param keyspace_name: Name of the keyspace to be restored, from
                              download path
        :returns: Dictionary with path of downloaded snapshot table dir as key
                  and path of new table dir as value
        :rtype: dict[str, str]

        """
        tables = (os.path.basename(table_path) for table_path in glob.iglob(
            os.path.join(restore_path, keyspace_name, '*'))
                  if os.path.isdir(table_path))

        restore_mapping = {}
        for table in tables:
            old_table_path = os.path.join(restore_path, keyspace_name, table)

            # NOTE: Following line removes 33 chars from table_name removes the
            # '-<UUID>' from table_path, which is the required arg for
            # `find_new_table_path`. Example: If the scylla has generated uuid
            # "08ae880a-52e9-43ec-9ed1-55afc2e8e7c6" for a table 'table1' in
            # keyspace 'keyspace1' then it will be stored on disk in directory:
            # <scylla_data_dir>/keyspace1/table1-08ae880a52e943ec9ed155afc2e8e7c6.
            # Notice that while appending uuid to tablename for directory,
            # scylladb removes all the hyphens. The UUID is present in the
            # directory of table when downloaded from cloud storage. Following
            # function will remove the 33 chars of uuid from that path name
            new_table_path = self.find_new_table_path(keyspace_name,
                                                      table[:-33])

            restore_mapping[old_table_path] = new_table_path

        return restore_mapping

    def restore_snapshot(self, restore_path, restore_mapping):
        """This function takes the mapping generated via
        `restore_snapshot_mapping` function for table mapping from snapshot to
        table data directory for scylladb. It then moves all the sstable files
        for each table from snapshot download dir to scylladb data dir.

        :param restore_mapping: Dictionary with path of downloaded snapshot
                                table dir as key and path of new table dir as
                                value
        :returns: Nothing
        :rtype: None

        """
        # Ensure that target restore dirs do not have any existing files
        target_dir_empty = True
        for new_table_path in restore_mapping.values():
            files_in_new_table_path = filter(
                lambda f: os.path.isfile(os.path.join(new_table_path, f)),
                os.listdir(new_table_path))
            if len(files_in_new_table_path) > 0:
                target_dir_empty = False
                table_name = os.path.basename(new_table_path)[:-33]
                logger.error("Newly created table {0} has some existing files "
                             "in {1}".format(table_name, new_table_path))

        if not target_dir_empty:
            sys.exit(2)

        nodetool_status = self.nodetool('status', _ok_code=[0, 1])
        if nodetool_status.exit_code is 0:
            logger.error('Nodetool status command was successful. Scylladb is '
                         'still running, it must be stopped before restoring!')
            sys.exit(3)

        for old_table_path, new_table_path in restore_mapping.items():
            for file_path in glob.iglob(os.path.join(old_table_path, '*')):
                move(file_path, new_table_path)
Esempio n. 6
0
    def processBatch(self, recordings):
        """
        The main processing function of this module. This function
        is called to do processing on a batch of recordings from the session.

        Parameters:
            name        the name to use to write the report to redis datastore
                        at 'report/name/session_id'
        Return:
            False or raise an exception if something is wrong (and
            this should not be called again.)
        """

        computeMfccFeats = Command('{}/src/featbin/compute-mfcc-feats'.format(
            self.common.kaldiRoot))
        computeMfccFeats = computeMfccFeats.bake(
            '--sample-frequency={}'.format(self.common.sampleFreq),
            '--use-energy=false', '--snip-edges=false')
        gmmLatgenFaster = Command('{}/src/gmmbin/gmm-latgen-faster'.format(
            self.common.kaldiRoot))
        latticeBestPath = Command('{}/src/latbin/lattice-best-path'.format(
            self.common.kaldiRoot))

        with tempfile.TemporaryDirectory(prefix='qc') as tmpdir:
            tokensGraphsScpPath = join(tmpdir, 'graphs.scp')
            mfccFeatsScpPath = join(tmpdir, 'feats.scp')
            mfccFeatsPath = join(tmpdir, 'feats.ark')
            tokensPath = join(tmpdir, 'tokens')
            with open(tokensPath, 'w') as tokensF, \
                 open(mfccFeatsScpPath, 'w') as mfccFeatsTmp, \
                 open(tokensGraphsScpPath, 'w') as tokensGraphsScp:

                graphsScp = []
                for r in recordings:
                    if self.common.downsample:
                        print(
                            '{rec_id} sox {rec_path} -r{sample_freq} -t wav - |'
                            .format(rec_id=r['recId'],
                                    rec_path=r['recPath'],
                                    sample_freq=self.common.sampleFreq),
                            file=mfccFeatsTmp)
                    else:
                        print('{} {}'.format(r['recId'], r['recPath']),
                              file=mfccFeatsTmp)

                    tokenInts = self.common.symToInt(r['token'])

                    print('{} {}'.format(r['recId'], tokenInts), file=tokensF)
                    try:
                        graphsScp.append('{} {}'.format(
                            r['recId'],
                            self.decodedScpRefs[str(r['tokenId'])]))
                    except KeyError as e:
                        print(
                            'Error, probably could not find key in MarosijoModule/local/graphs.scp, id: {}, prompt: {}'
                            .format(r['tokenId'], r['token']))
                        raise

                # make sure .scp file is sorted on keys
                graphsScp = sorted(graphsScp, key=lambda x: x.split()[0])
                for line in graphsScp:
                    print(line, file=tokensGraphsScp)

            try:
                # We save the features on disk (the ,p means permissive. Let kaldi ignore errors,
                # and handle missing recordings later)
                computeMfccFeats('scp,p:{}'.format(mfccFeatsScpPath),
                                 'ark:{}'.format(mfccFeatsPath))

                computeCmvnCmd = (
                    '{kaldi_root}/src/featbin/compute-cmvn-stats ' +
                    'ark,p:{mfcc_feats_path} ' + 'ark:- ').format(
                        mfcc_feats_path=mfccFeatsPath,
                        kaldi_root=self.common.kaldiRoot)

                featsCmd = (
                    '{kaldi_root}/src/featbin/apply-cmvn ' +
                    '"ark,p:{compute_cmvn_cmd} |" ' +
                    'ark:{mfcc_feats_path} ' +
                    '"ark:| {kaldi_root}/src/featbin/add-deltas ark,p:- ark:-" '
                ).format(compute_cmvn_cmd=computeCmvnCmd,
                         mfcc_feats_path=mfccFeatsPath,
                         kaldi_root=self.common.kaldiRoot)

                # create a pipe using sh, output of gmm_latgen_faster piped into lattice_oracle
                # piping in contents of tokens_graphs_scp_path and writing to edits_path
                # note: be careful, as of date sh seems to swallow exceptions in the inner pipe
                #   https://github.com/amoffat/sh/issues/309

                hypLines = latticeBestPath(
                    gmmLatgenFaster(
                        '--acoustic-scale=0.06',  ##DEM - --acoustic-scale=0.06 This was 0.1, changing this value fixed the issue of phonemes ending in the transcription
                        '--beam=12',
                        '--max-active=1000',
                        '--lattice-beam=10.0',
                        '--max-mem=50000000',
                        self.common.acousticModelPath,
                        f'scp,p:{tokensGraphsScpPath}',  # fsts-rspecifier
                        f'ark,p:{featsCmd} |',  # features-rspecifier
                        'ark:-',  # lattice-wspecifier
                        _err=sys.stderr,
                        _piped=True),
                    '--acoustic-scale=0.06',
                    f"--word-symbol-table={self.common.symbolTablePath}",
                    'ark,p:-',
                    'ark,t:-',
                    _iter=True,
                    _err=sys.stderr)

            except ErrorReturnCode_1 as e:
                # No data (e.g. all wavs unreadable)
                hypLines = []
                print('e.stderr: ', e.stderr)

            def splitAlsoEmpty(s):
                cols = s.split(maxsplit=1)
                if len(cols) == 1:
                    return cols[0], ''
                elif len(cols) == 2:
                    return cols[0], cols[1]
                else:
                    raise ValueError('Unexpected')

            hyps = {
                str(recId): tok_
                for recId, tok_ in (splitAlsoEmpty(line.strip())
                                    for line in hypLines)
            }

            refs = {
                str(recId): tok_
                for recId, tok_ in ((r['recId'],
                                     self.common.symToInt(r['token']))
                                    for r in recordings)
            }

            #for r in recordings:
            #    print(r, self.common.symToInt(r['token']))

            details = {
                hypKey: MarosijoAnalyzer(hypTok.split(), refs[hypKey].split(),
                                         self.common).details()
                for hypKey, hypTok in hyps.items()
            }

            # 'empty' analysis in case Kaldi couldn't analyse recording for some reason
            # look at MarosijoAnalyzer.details() for format
            placeholderDetails = {
                'hybrid': 0.0,
                'phone_acc': 0.0,
                'wer': 0.0,
                'onlyInsOrSub': False,
                'correct': 0,
                'sub': 0,
                'ins': 0,
                'del': 0,
                'startdel': 0,
                'enddel': 0,
                'extraInsertions': 0,
                'empty': False,
                'distance': 0
            }

            edits = {
                hypKey: details[hypKey]['distance']
                for hypKey, hypTok in hyps.items()
            }

            qcReport = []

            cumAccuracy = 0.0
            """
            We have an error in hypLines where gmmLatgenFaster outputs: 
            Not producing output for utterance SESS0139_BLOCKG_08 since no final-state reached and --allow-partial=false
            This means that the key for that utterance isnt added to the dict hyps and that causes an error down
            the line. The fix is to remove that key for the dict recordings. And later figure out what to 
            do with does recs
            https://groups.google.com/g/kaldi-help/c/fTc3RP21tBY?pli=1
            
            """

            with open(join('log', 'tharErrorLog'), 'a') as f_out:
                for r in recordings:
                    if str(r['recId']) not in hyps.keys():
                        f_out.write(str(r) + '\n')
                        recordings.remove(r)

            for r in recordings:
                error = ''
                try:
                    old_wer = edits[str(r['recId'])] / len(r['token'].split())

                except KeyError as e:
                    # Kaldi must have choked on this recording for some reason
                    if isWavHeaderOnly(r['recPath']):
                        error = 'wav_header_only'
                        print(
                            'Error, only wav header in recording: {} for session: {}; {}'
                            .format(r['recId'], repr(e)))
                    else:
                        # unknown error
                        print(e)
                        error = 'unknown_error'
                        print(
                            'Error, unknown error processing recording: {}; {}'
                            .format(r['recId'], repr(e)))

                try:
                    hyp = ' '.join([
                        self.common.symbolTableToInt[x]
                        for x in hyps[str(r['recId'])].split(' ')
                    ])  # hypothesis (words not ints)

                except KeyError as e:
                    if hyps[str(r['recId'])] == '':
                        hyp = ''
                    else:
                        if not error:
                            error = 'hyp_error'
                            print(
                                'Error, hypothesis error processing recording: {} for session {}'
                                .format(r['recId'], repr(e)))

                if not error:
                    old_wer_norm = 0.0 if 1 - old_wer < 0 else 1 - old_wer
                else:
                    old_wer_norm = 0.0
                    hyp = ''

                if not error:
                    analysis = details[str(r['recId'])]
                    analysis.update(error='no_error')
                else:
                    analysis = placeholderDetails
                    analysis.update(error=error)

                analysis.update(old_wer_norm=old_wer_norm)
                analysis.update(hyp=hyp)

                # handle specific errors
                if error == 'wav_header_only':
                    analysis.update(empty=True)

                # use phone accuracy (seemed to give best results)
                accuracy = analysis['phone_acc']

                stats = {"accuracy": accuracy}
                cumAccuracy += accuracy

                stats.update(analysis)
                qcReport.append({"recordingId": r['recId'],\
                                 "recPath": r['recPath'],\
                                 "sentence": r['token'], \
                                 "is valid?": r['valid'], \
                                 "stats": stats})

        return qcReport