Ejemplo n.º 1
0
    current_step += 1
    if current_step >= options.step:
        logger.info("Start step: %d (%s)" %
                    (current_step, 'Rescoring lattices with lattice-tool'))
        if os.path.exists(baseline_lat_dir_rescored):
            shutil.rmtree(baseline_lat_dir_rescored)
        htk.lattice_rescore(current_step, baseline_lat_dir,
                            baseline_lat_dir_rescored, lm_rescore + '.gz',
                            lm_scale)

    current_step += 1
    if current_step >= options.step:
        logger.info("Start step: %d (%s)" %
                    (current_step, 'Decoding lattices with lattice-tool'))
        htk.lattice_decode(current_step, baseline_lat_dir_rescored,
                           baseline_pass2_mlf, lm_scale)

    data_manipulation.mlf_to_trn(baseline_pass1_mlf, pass1_trn,
                                 speaker_name_width)
    data_manipulation.mlf_to_trn(baseline_pass2_mlf, pass2_trn,
                                 speaker_name_width)

unsupsi_dir = 'unsup_si'
unsupsi_lat_dir = unsupsi_dir + '/lattices.htk'
unsupsi_lat_dir_rescored = unsupsi_dir + '/lattices.rescored'

if 'unsupsi' in experiments:

    adapt_mlf = unsupsi_dir + '/adapt.mlf'
    pass1_mlf = unsupsi_dir + '/pass1.mlf'
    pass2_mlf = unsupsi_dir + '/pass2.mlf'
Ejemplo n.º 2
0
    def run(self):
#        with open(self.name + '/outtest', 'w') as o:
#            print >> o, self.configuration
        work_dir = self.name

        xforms_dir = work_dir + '/xforms'
        classes_dir = work_dir + '/classes'
        files_dir = work_dir + '/files'


        model = self.model.configuration['model_dir'] + '/' + self.configuration['model_name']
        recog_scp = work_dir +"/recog.scp"
        rscp = self.model.configuration['recognize_scp']
        if not self.configuration['recognize_scp'].startswith('_'):
            rscp = self.configuration['recognize_scp']
        data_manipulation.copy_scp_file(rscp, recog_scp)
        #shutil.copyfile(self.model.configuration['recognize_scp'], recog_scp)
        #dict_hvite = self.model.configuration['dict_hvite']
        dict_hdecode = self.model.configuration['dict_hdecode']
        tiedlist = self.model.configuration['tiedlist']
        lm = self.model.configuration['lm']
        lm_rescore = self.model.configuration['lm_rescore']
        lm_scale = self.configuration['lm_scale']
        beam = self.configuration['beam']
        end_beam = self.configuration['end_beam']
        max_pruning = self.configuration['max_pruning']
        num_tokens = self.configuration['num_tokens']
        hdecode_mlf = work_dir + '/hdecode.mlf'
        rescore_mlf = work_dir + '/rescore.mlf'
        recog_trn =  work_dir + '/recog.trn'
        configs =  [self.model.configuration['config']]
        speaker_name_width = self.model.configuration['speaker_name_width']
#        adaptation_dir=work_dir + '/xforms'


        try:

            #if os.path.exists(work_dir): shutil.rmtree(work_dir)




            htk_lat_dir = os.path.join(work_dir, 'lattices.htk')
            rescore_lat_dir = os.path.join(work_dir, 'lattices.rescore')
            log_dir = os.path.join(work_dir, 'log')
            for dir in [work_dir, htk_lat_dir, rescore_lat_dir, log_dir, xforms_dir, classes_dir, files_dir]:
                if not os.path.exists(dir):
                    os.mkdir(dir)

            current_parent_transform = None
            for i, adaptation in enumerate(self.adaptations):
                current_parent_transform = adaptation.make_adaptation(i,current_parent_transform,self)

            adap_dirs = None
            speaker_name_width = self.model.configuration['speaker_name_width']
            if current_parent_transform is not None:
                configs.append(current_parent_transform[1])
                extension = current_parent_transform[0]
                adap_dirs = [(xforms_dir, extension),(classes_dir, None)]
                speaker_name_width = current_parent_transform[2]

            print "Start step: %d (%s)" % (0, 'Generating lattices with HDecode')
            htk.HDecode(log_dir, recog_scp, model, dict_hdecode, tiedlist, lm, htk_lat_dir, num_tokens,
                        hdecode_mlf, configs, lm_scale, beam, end_beam, max_pruning, adap_dirs, speaker_name_width)

            if lm_rescore is not None:
                print "Start step: %d (%s)" % (0, 'Rescoring lattices with lattice-tool')
                htk.lattice_rescore(log_dir, htk_lat_dir, rescore_lat_dir, lm_rescore + '.gz', lm_scale)


                print "Start step: %d (%s)" % (0, 'Decoding lattices with lattice-tool')
                htk.lattice_decode(log_dir, rescore_lat_dir, rescore_mlf, lm_scale)
                data_manipulation.mlf_to_trn(rescore_mlf, recog_trn, self.model.configuration['speaker_name_width'])

            else:
                with open(rescore_mlf, 'w') as out_file:
                    prev = ""
                    for line in open(hdecode_mlf):
                        if line.rstrip() == '.' and prev != '</s>':
                            print >> out_file, "</s>"
                        print >> out_file, line.rstrip().replace(htk_lat_dir, '*')
                        prev = line.rstrip()
                #shutil.copyfile(hdecode_mlf, rescore_mlf)
                data_manipulation.mlf_to_trn(hdecode_mlf, recog_trn, self.model.configuration['speaker_name_width'])



            self.done = True
            return
        except Experiment:
            self.done = False
        pass
Ejemplo n.º 3
0
        logger.info("Start step: %d (%s)" % (current_step, 'Generating lattices with HDecode'))
        if os.path.exists(baseline_lat_dir): shutil.rmtree(baseline_lat_dir)
        os.mkdir(baseline_lat_dir)

        htk.HDecode(current_step, scp_file, si_model, dict_hdecode, phones_list, lm, baseline_lat_dir, num_tokens, baseline_pass1_mlf, [config_hdecode], lm_scale, beam, end_beam, max_pruning)

    current_step += 1
    if current_step >= options.step:
        logger.info("Start step: %d (%s)" % (current_step, 'Rescoring lattices with lattice-tool'))
        if os.path.exists(baseline_lat_dir_rescored): shutil.rmtree(baseline_lat_dir_rescored)
        htk.lattice_rescore(current_step, baseline_lat_dir, baseline_lat_dir_rescored, lm_rescore + '.gz', lm_scale)

    current_step += 1
    if current_step >= options.step:
        logger.info("Start step: %d (%s)" % (current_step, 'Decoding lattices with lattice-tool'))
        htk.lattice_decode(current_step,baseline_lat_dir_rescored, baseline_pass2_mlf, lm_scale)


    data_manipulation.mlf_to_trn(baseline_pass1_mlf, pass1_trn, speaker_name_width)
    data_manipulation.mlf_to_trn(baseline_pass2_mlf, pass2_trn, speaker_name_width)

unsupsi_dir = 'unsup_si'
unsupsi_lat_dir = unsupsi_dir + '/lattices.htk'
unsupsi_lat_dir_rescored = unsupsi_dir + '/lattices.rescored'

if 'unsupsi' in experiments:

    adapt_mlf = unsupsi_dir + '/adapt.mlf'
    pass1_mlf = unsupsi_dir + '/pass1.mlf'
    pass2_mlf = unsupsi_dir + '/pass2.mlf'
    pass1_trn = unsupsi_dir + '/pass1.trn'
Ejemplo n.º 4
0
    def run(self):
        #        with open(self.name + '/outtest', 'w') as o:
        #            print >> o, self.configuration
        work_dir = self.name

        xforms_dir = work_dir + '/xforms'
        classes_dir = work_dir + '/classes'
        files_dir = work_dir + '/files'

        model = self.model.configuration[
            'model_dir'] + '/' + self.configuration['model_name']
        recog_scp = work_dir + "/recog.scp"
        rscp = self.model.configuration['recognize_scp']
        if not self.configuration['recognize_scp'].startswith('_'):
            rscp = self.configuration['recognize_scp']
        data_manipulation.copy_scp_file(rscp, recog_scp)
        #shutil.copyfile(self.model.configuration['recognize_scp'], recog_scp)
        #dict_hvite = self.model.configuration['dict_hvite']
        dict_hdecode = self.model.configuration['dict_hdecode']
        tiedlist = self.model.configuration['tiedlist']
        lm = self.model.configuration['lm']
        lm_rescore = self.model.configuration['lm_rescore']
        lm_scale = self.configuration['lm_scale']
        beam = self.configuration['beam']
        end_beam = self.configuration['end_beam']
        max_pruning = self.configuration['max_pruning']
        num_tokens = self.configuration['num_tokens']
        hdecode_mlf = work_dir + '/hdecode.mlf'
        rescore_mlf = work_dir + '/rescore.mlf'
        recog_trn = work_dir + '/recog.trn'
        configs = [self.model.configuration['config']]
        speaker_name_width = self.model.configuration['speaker_name_width']
        #        adaptation_dir=work_dir + '/xforms'

        try:

            #if os.path.exists(work_dir): shutil.rmtree(work_dir)

            htk_lat_dir = os.path.join(work_dir, 'lattices.htk')
            rescore_lat_dir = os.path.join(work_dir, 'lattices.rescore')
            log_dir = os.path.join(work_dir, 'log')
            for dir in [
                    work_dir, htk_lat_dir, rescore_lat_dir, log_dir,
                    xforms_dir, classes_dir, files_dir
            ]:
                if not os.path.exists(dir):
                    os.mkdir(dir)

            current_parent_transform = None
            for i, adaptation in enumerate(self.adaptations):
                current_parent_transform = adaptation.make_adaptation(
                    i, current_parent_transform, self)

            adap_dirs = None
            speaker_name_width = self.model.configuration['speaker_name_width']
            if current_parent_transform is not None:
                configs.append(current_parent_transform[1])
                extension = current_parent_transform[0]
                adap_dirs = [(xforms_dir, extension), (classes_dir, None)]
                speaker_name_width = current_parent_transform[2]

            print "Start step: %d (%s)" % (0,
                                           'Generating lattices with HDecode')
            htk.HDecode(log_dir, recog_scp, model, dict_hdecode, tiedlist, lm,
                        htk_lat_dir, num_tokens, hdecode_mlf, configs,
                        lm_scale, beam, end_beam, max_pruning, adap_dirs,
                        speaker_name_width)

            if lm_rescore is not None:
                print "Start step: %d (%s)" % (
                    0, 'Rescoring lattices with lattice-tool')
                htk.lattice_rescore(log_dir, htk_lat_dir, rescore_lat_dir,
                                    lm_rescore + '.gz', lm_scale)

                print "Start step: %d (%s)" % (
                    0, 'Decoding lattices with lattice-tool')
                htk.lattice_decode(log_dir, rescore_lat_dir, rescore_mlf,
                                   lm_scale)
                data_manipulation.mlf_to_trn(
                    rescore_mlf, recog_trn,
                    self.model.configuration['speaker_name_width'])

            else:
                with open(rescore_mlf, 'w') as out_file:
                    prev = ""
                    for line in open(hdecode_mlf):
                        if line.rstrip() == '.' and prev != '</s>':
                            print >> out_file, "</s>"
                        print >> out_file, line.rstrip().replace(
                            htk_lat_dir, '*')
                        prev = line.rstrip()
                #shutil.copyfile(hdecode_mlf, rescore_mlf)
                data_manipulation.mlf_to_trn(
                    hdecode_mlf, recog_trn,
                    self.model.configuration['speaker_name_width'])

            self.done = True
            return
        except Experiment:
            self.done = False
        pass