예제 #1
0
    def do_training(self, speech_corpus, text_corpus):

        if self.trained:
            print('NNDurationPredictor already trained')
            return

        print(
            'Training of NNDurationPredictor itself not supported within Ossian -- '
        )
        print('use Merlin to train on the prepared data')
        if not os.path.isdir(self.model_dir):
            os.makedirs(self.model_dir)

        ## TODO: refactor to share the block below and write_merlin_config between
        ## NNDurationPredictor and NNAcousticPredictor

        ### Write merlin training list:
        utts_to_use = []
        for utterance in speech_corpus:
            if utterance.has_external_data(self.input_label_filetype):
                utts_to_use.append(utterance.get("utterance_name"))
        writelist(utts_to_use, os.path.join(self.model_dir, 'filelist.txt'))
        n_utts = len(utts_to_use)

        self.write_merlin_config(n_utts=n_utts)
예제 #2
0
    def process_utterance(self, utt, make_label=True):

        utt_data = []

        utt_questions = defaultdict(int)  ## {}

        nodelist = utt.xpath(self.target_nodes)
        if nodelist == []:
            print(
                'WARNING: FeatureDumper\'s target_nodes matches no nodes: %s' %
                (self.config["target_nodes"]))

        for node in nodelist:
            node_data, node_questions = self.get_node_context_label(node)
            utt_data.append(node_data)

            ##utt_questions.update(node_questions)
            ## Sum the dictionaries' values:
            for question in node_questions:
                utt_questions[question] += node_questions[question]

        if make_label:
            label_file = utt.get_filename(self.output_filetype)
            if self.binary_output:
                utt_data = [line.split(' ') for line in utt_data]
                ## In case of string data being present, following line will give:
                ## ValueError: could not convert string to float: a
                utt_data = numpy.array(utt_data, dtype='float')
                put_speech(utt_data, label_file)
            else:
                writelist(utt_data, label_file, uni=True)

        return (utt_data, utt_questions
                )  ## for writing utterance-level labels,
예제 #3
0
    def load(self):
    
        self.target_nodes = self.config.get('target_nodes', '//utt')    
        self.input_attribute = self.config.get('input_attribute', 'norm_text')
        
        self.merge_clitics = self.config.get('merge_clitics', 'True') ## string, not bool
    
        ## check tools exist:
        corenlp_location = os.path.join(self.voice_resources.path[c.BIN], '..', \
                                                            'corenlp-python', 'corenlp')
        assert os.path.isdir(corenlp_location)
        sys.path.append(corenlp_location)
        from corenlp import StanfordCoreNLP
        corenlp_dir = os.path.join(corenlp_location, '..', 'stanford-corenlp-full-2014-06-16')
        
        ## Each document is to be treated as one sentence, no sentence splitting at all. 
        ## Write config for this if necessary:
        corenlp_conf_name = 'no_sentence_split.properties'
        corenlp_conf_file = os.path.join(corenlp_location, corenlp_conf_name)
        if not os.path.isfile(corenlp_conf_file):
            data = ['annotators = tokenize, ssplit, pos, lemma, ner, parse, dcoref', \
                    'ssplit.isOneSentence = true']
            writelist(data, corenlp_conf_file)

        print 'Loading stanford corenlp modules from %s ...'%(corenlp_dir)
        print 'Takes a while (~20-30 seconds)...'
        self.models = StanfordCoreNLP(corenlp_dir, properties=corenlp_conf_name)     
예제 #4
0
    def process_utterance(self, utt, make_label=True):

        utt_data = []
        utt_questions = defaultdict(int)

        nodelist = utt.xpath(self.config["target_nodes"])
        if nodelist == []:
            print(
                'WARNING: FeatureDumper\'s target_nodes matches no nodes: %s' %
                (self.config["target_nodes"]))

        for node in nodelist:

            self.htk_state_xpath = None  ## make sure this is none.
            self.start_time_xpath = None
            self.end_time_xpath = None

            ## for phone!:--
            node_data, node_questions = self.get_node_context_label(node)

            statelist = node.xpath('.//' + self.state_tag)
            assert statelist != []
            for (i, state) in enumerate(statelist):

                state_ix = i + 2
                state_node_data = "%s[%s]" % (node_data, state_ix)

                start_time = state.attrib.get(self.start_attribute,
                                              '_NA_')  ## no time at runtime!
                end_time = state.attrib.get(self.end_attribute, '_NA_')

                if not (start_time == "_NA_" or end_time == "_NA_"):

                    start_time = string.ljust(str(ms_to_htk(start_time)), 10)
                    end_time = string.ljust(str(ms_to_htk(end_time)), 10)

                    state_node_data = "%s %s %s" % (start_time, end_time,
                                                    state_node_data)

                utt_data.append(state_node_data)

            ##utt_questions.update(node_questions)
            ## Sum the dictionaries' values:
            for question in node_questions:
                utt_questions[question] += node_questions[question]

        if make_label:
            label_file = utt.get_filename(self.config["output_filetype"])
            writelist(utt_data, label_file, uni=True)

        return (utt_data, utt_questions
                )  ## for writing utterance-level labels,
예제 #5
0
    def make_simple_continuous_questions(self, outfile):

        cont_qlist = []  ## write continuous questions about numerical features  
        key_list = []

        for (number, name) in enumerate(self.mapped_feature_names):
            ## NB_  special regex to handle decimal point! --
            cont_qlist.append("CQS %s {*/%s:([\d\.]+)/*}" % (name, number))
            key_list.append("/%s:\t%s" % (number, name))

        writelist(cont_qlist, outfile + '.cont', uni=True)

        key_file = outfile + ".key"
        writelist(key_list, key_file, uni=True)
예제 #6
0
    def process_utterance(self, utt):
        # print('!!! in MappedFeatureDumper::process_utterance')
        utt_data = []

        nodelist = utt.xpath(self.target_nodes)
        if nodelist == []:
            print('WARNING: FeatureDumper\'s target_nodes matches no nodes: %s' % (self.config["target_nodes"]))

        for node in nodelist:
            node_data = self.get_node_context_label(node)
            utt_data.append(node_data)

        label_file = utt.get_filename(self.output_filetype)
        writelist(utt_data, label_file, uni=True)
예제 #7
0
    def do_training(self, corpus, text_corpus):

        dict_location = os.path.join(self.voice_resources.path[c.LANG],
                                     'labelled_corpora', self.dictionary)

        ## phoneset
        phonetable_files = glob.glob(os.path.join(dict_location, '*.table'))
        if phonetable_files == []:
            sys.exit('Cannot find any phone table files at %s' %
                     (os.path.join(dict_location, '*.table')))
        phonetable_file = phonetable_files[0]  ## take first
        shutil.copy(phonetable_file, self.phoneset_fname)
        ## load phoneset now for converting lexicon:
        self.phoneset = LookupTable(self.phoneset_fname, is_phoneset=True)

        ## letter pronunciations
        letter_file = os.path.join(dict_location, 'letter.names')
        assert os.path.isfile(letter_file)
        shutil.copy(letter_file, self.letter_fname)
        self.load_letternames()  # populate self.letternames

        ## lexicon
        dict_files = [f for f in glob.glob(os.path.join(dict_location, '*')) \
                    if f.endswith('.out')]

        ## exclude cmudict extensions:  ## or f.endswith('.scm') ]
        ## glob doesn't support {} for .{out,scm}

        assert dict_files != [], 'No lexicon files found at %s' % (
            dict_location)
        self.convert_lexicon(dict_files)

        ## onsets
        self.count_onsets_and_codas()
        onset_strings = [' '.join(onset) for onset in self.onsets.keys()]
        writelist(onset_strings, self.onsets_fname)

        ## G2P
        train_file = os.path.join(self.get_training_dir(), 'train_data.txt')
        self.make_sequitur_train_data(train_file)
        self.train_sequitur_g2p(train_file)

        ## save it also globally for posterity:-
        if os.path.isdir(self.component_path):
            shutil.rmtree(self.component_path)
        shutil.copytree(self.model_dir, self.component_path)
예제 #8
0
    def do_training(self, speech_corpus, text_corpus):
        """
        "training" an extractor involves writing a config, and establishing the location of resources etc.

        Write also desciption of .cmp files in terms of streams, stream widths to be used by alignment and acoustic model

        完成以下事情:
            1. 写入配置文件acoustic_feats.cfg
            2. 写入.cmp文件
        :param speech_corpus: Utterance列表
        :param text_corpus: 文本列表
        :return: None
        """
        self.acoustic_feats = self.get_location() + "/acoustic_feats.cfg"
        self.tool = self.voice_resources.get_path(c.BIN)

        for toolname in ['analysis', 'synth']:
            path = os.path.join(self.tool, toolname)
            assert os.path.isfile(path), '%s does not exist' % (path)
            assert os.access(path, os.X_OK), '%s is not executable' % (path)

        self.fftl, self.apsize = get_world_fft_and_apdim(self.sample_rate)

        # make acoustic modelling config
        self.feats = ['mgc', 'lf0', 'bap']

        self.stream_sizes = [str(self.order + 1), '1', str(self.apsize)]
        weights = ['1', '0', '0']
        msd = ['0', '1', '0']
        floor_scale = ["0.01" for x in range(len(self.feats))]

        streams = []

        # modifications for MSD streams
        cur_stream_index = 1
        for i in range(len(self.feats)):
            if msd[i] == "1":
                self.stream_sizes[i] += " 1 1"
                weights[i] += " " + weights[i] + " " + weights[i]
                floor_scale[i] += " " + floor_scale[i] + " " + floor_scale[i]
                msd[i] = "1 1 1"
                streams.append(
                    str(cur_stream_index) + "-" + str(cur_stream_index + 2))
                cur_stream_index += 3
            else:
                streams.append(str(cur_stream_index))
                cur_stream_index += 1

        # save these for acoustic model training with cmps
        htk_feats = open(self.acoustic_feats, "w")
        htk_feats.write("STREAMS=\"%s\"\n" % " ".join(streams))
        htk_feats.write("STREAM_NAMES=\"%s\"\n" % " ".join(self.feats))
        htk_feats.write(
            "SHORT_STREAM_NAMES=\"1 2 5\"\n"
        )  # % " ".join([str(i+1) for i in range(len(self.feats))]))     #
        htk_feats.write("STATIC_STREAM_SIZES=\"%s\"\n" %
                        " ".join(self.stream_sizes))
        htk_feats.write("MSD_STREAM_INFO=\"%s\"\n" % " ".join(msd))
        htk_feats.write("STREAM_WEIGHTS=\"%s\"\n" % " ".join(weights))
        htk_feats.write(
            "VFLOORSCALESTR=\"Vector %d %s\"\n" %
            (len(" ".join(self.stream_sizes).split()), " ".join(floor_scale)))
        htk_feats.close()

        ## Make delta window coefficients in config file into separate files:
        training_dir = self.get_training_dir()
        self.winfiles = []
        for window in ['static_window', 'delta_window', 'delta_delta_window']:
            fname = os.path.join(training_dir, window + '.win')
            data = self.coding_config[window]
            length = len(data.strip().split())
            data = '%s %s' % (length, data)
            writelist([data], fname)
            self.winfiles.append(fname)
예제 #9
0
    def process_utterance(self, utt):

        ## If there is no waveform attached to the utt, don't do anything:
        if not utt.has_attribute("waveform"):
            return

            ## Add some data to the utt structure recording the structure of the
        ## associated acoustic features we've produced. Do this first, in case
        ## we use existing features.
        self.stream_sizes[
            1] = '1'  ## otherwise '1 1 1' for F0    TODO: fix this nicely!
        utt.add_acoustic_stream_info(self.feats, self.stream_sizes)

        ## If a feature file already exists, skip:
        if utt.has_external_data(self.output_filetype):
            ##  TODO: check description against existing feats?
            return

        ## else extract features
        infile = utt.get("waveform")
        outfile = utt.get_filename(self.output_filetype)

        ## strip suffix .cmp:-
        assert outfile.endswith('.' + self.output_filetype)
        chars_to_strip = len(self.output_filetype) + 1
        outstem = outfile[:-chars_to_strip]

        rate = self.rate
        sample_rate = self.rate
        alpha = self.alpha
        order = self.order
        fftl = self.fftl
        apsize = self.apsize
        frameshift_ms = self.frameshift_ms

        script_dir = self.voice_resources.path[c.SCRIPT]

        ## 1) remove wave header, downsample etc. with sox:
        comm = "sox -t wav " + infile
        comm += " -c 1 -e signed-integer "
        comm += " -r %s" % (rate)
        comm += " -b 16 "
        comm += " " + outstem + ".wav"
        comm += " dither"  ## added for hi and rj data blizz 2014
        success = os.system(comm)
        if success != 0:
            print 'sox failed on utterance ' + utt.get("utterance_name")
            return

        comm = "%s/analysis %s.wav %s.f0.double %s.sp.double %s.bap.double > %s.log" % (
            self.tool, outstem, outstem, outstem, outstem, outstem)
        success = os.system(comm)  # This command is very slow
        # print comm
        if success != 0:
            print 'world analysis failed on utterance ' + utt.get(
                "utterance_name")
            return

        if self.resynthesise_training_data:
            ## resynthesis to test
            comm = "%s/synth %s %s %s.f0.double %s.sp.double %s.bap.double %s.resyn.wav > %s.log" % (
                self.tool, fftl, rate, outstem, outstem, outstem, outstem,
                outstem)
            success = os.system(comm)
            if success != 0:
                print 'world synthesis failed on utterance ' + utt.get(
                    "utterance_name")
                return

        comm = "%s/x2x +df %s.sp.double | %s/sopr -R -m 32768.0 | %s/mcep -a %s -m %s -l %s -j 0 -f 0.0 -q 3 > %s.mgc" % (
            self.tool, outstem, self.tool, self.tool, alpha, order, fftl,
            outstem)
        ## -e 1.0E-8
        success = os.system(comm)  # This command is very slow
        if success != 0:
            print 'conversion of world spectrum to mel cepstra failed on utterance ' + utt.get(
                "utterance_name")
            return

        for stream in ['bap']:
            comm = "%s/x2x +df %s.%s.double > %s.%s" % (
                self.tool, outstem, stream, outstem, stream)
            success = os.system(comm)
            if success != 0:
                print 'double -> float conversion (stream: ' + stream + ') failed on utterance ' + utt.get(
                    "utterance_name")
                return

        for stream in ['f0']:
            comm = "%s/x2x +da %s.%s.double > %s.%s.txt" % (
                self.tool, outstem, stream, outstem, stream)
            success = os.system(comm)
            if success != 0:
                print 'double -> ascii conversion (stream: ' + stream + ') failed on utterance ' + utt.get(
                    "utterance_name")
                return

                ## 5) F0 conversion:
        f0 = [float(val) for val in readlist(outstem + '.f0.txt')]
        log_f0 = []
        for val in f0:
            if val == 0.0:
                log_f0.append('-1.0E10')
            else:
                log_f0.append(math.log(val))
        writelist(log_f0, outstem + '.f0.log')

        comm = "%s/x2x +af %s.f0.log > %s.lf0" % (self.tool, outstem, outstem)
        success = os.system(comm)
        if success != 0:
            print 'writing log f0 failed on utterance ' + utt.get(
                "utterance_name")
            return

        ## add mcep/ap/f0 deltas:
        for (stream, dimen) in [('mgc', order + 1), ('bap', apsize),
                                ('lf0', 1)]:
            comm = "perl %s/window.pl %s " % (script_dir, dimen)
            comm += "%s.%s %s > %s.%s.delta" % (outstem, stream, ' '.join(
                self.winfiles), outstem, stream)
            success = os.system(comm)  # This command is very slow
            if success != 0:
                print 'delta (' + stream + ') extraction failed on utterance ' + utt.get(
                    "utterance_name")
                return

        ### combined streams:--
        ap = get_speech(outstem + '.bap.delta', apsize * len(self.winfiles))
        mgc = get_speech(outstem + '.mgc.delta',
                         (order + 1) * len(self.winfiles))
        lf0 = get_speech(outstem + '.lf0.delta', 1 * len(self.winfiles))
        cmp = numpy.hstack([mgc, lf0, ap])
        put_speech(cmp, outfile)

        ## 7) add header
        floats_per_frame = (order + 2 + apsize) * len(
            self.winfiles)  ## +2 for energy and F0
        add_htk_header(outfile, floats_per_frame, frameshift_ms)

        ## 8) tidy:
        self.extensions_to_keep = ['.' + self.output_filetype,
                                   '.f0.txt']  ## TODO: make configuable?
        self.extensions_to_keep.append('.resyn.wav')
        self.extensions_to_keep.extend(['.mgc', '.bap', '.lf0'])

        keepfiles = [outstem + ending for ending in self.extensions_to_keep]

        for junk in glob.glob(outstem + '.*'):
            if not junk in keepfiles:
                os.remove(junk)
예제 #10
0
    def format_question_set(self, raw_questions, outfile):
        """
        Take raw_questions: list of (number, name, value) triplets, ...

        Write formatted questions to outfile, and human-readable key to outfile.key

        Additionally, write question file including continuous questions (CQS) for
        DNN training
        保存问题集,路径如下:
            1. qlist保存到outfile
            2. cont_qlist保存到outfile+".cont"
            3. key_list保存到 outfile+".key"
            4. values_list 保存到 outfile + ".values"

        :param raw_questions: 问题列表,形如:[(0, 'segment', u'_CJKUNIFIEDIDEOGRAPHEIGHTZEROCE_'),
                              (0, 'segment', u'_CJKUNIFIEDIDEOGRAPHFIVEDFFOUR_'),
                              (0, 'segment', u'_CJKUNIFIEDIDEOGRAPHEIGHTFOURSEVENFIVE_'), ...]
        :param outfile: 保存路径
        :return: None
        """
        # 以(number, name)作为key,以value作为value
        unique_questions = {}
        for (number, name, value) in raw_questions:
            if (number, name) not in unique_questions:
                unique_questions[(number, name)] = []
            if value not in unique_questions[(number, name)]:
                unique_questions[(number, name)].append(value)

        qlist = []
        cont_qlist = []  ## write continuous questions about numerical features  
        key_list = []  ## To make human-readable key to the feature set
        values_list = []  ## To make reference list of all values taken by a feature  

        for ((number, name), values) in sorted(unique_questions.items()):
            values.sort()
            key_list.append((number, name))

            NA_present = False
            if '_NA_' in values:
                values.remove('_NA_')
                NA_present = True

            if all_entries_of_type(values, str):
                ## For strings, make single question for each item, no groups:
                for value in values:
                    qlist.append("QS %s_is_%s {*/%s:%s/*}" % (name, value, number, value))
                    cont_qlist.append("QS %s_is_%s {*/%s:%s/*}" % (name, value, number, value))
                values_list.append((number, name, 'CATEGORICAL', ' '.join(values)))

            elif all_entries_of_type(values, unicode):
                ## For strings, make single question for each item, no groups:
                for value in values:
                    qlist.append("QS %s_is_%s {*/%s:%s/*}" % (name, value, number, value))
                    cont_qlist.append("QS %s_is_%s {*/%s:%s/*}" % (name, value, number, value))
                values_list.append((number, name, 'CATEGORICAL', ' '.join(values)))

            elif all_entries_of_type(values, int):
                ## For integers, make single question for each item, and also groups 
                ## based on single splits of the range.
                ## Aug 2014: modified -- just use split points -- questions based on 
                ## single values are too arbitrary.
                #
                # for value in values:
                #    qlist.append("QS %s_is_%s {*/%s:%s/*}"%(name, value, number, value))

                values_list.append((number, name, 'NUMERIC', 'MAX:' + str(max(values))))

                cont_qlist.append("CQS %s {*/%s:(\d+)/*}" % (name, number))

                qlist.extend([""])  ## for formatting of final file
                for split_point_ix in range(1, len(values)):
                    split_point = values[split_point_ix]

                    wildcard_values = make_htk_wildcards(split_point)
                    formatted_sublist = ["/%s:%s/" % (number, value) for value in wildcard_values]
                    formatted_sublist = "*,*".join(formatted_sublist)

                    qlist.append("QS %s_<_%s {*%s*}" % (name, split_point, formatted_sublist))

            elif all_entries_of_type(values, float):
                ## floats -- only make CQS

                values_list.append((number, name, 'NUMERIC', 'MAX:' + str(max(values))))

                ## NB_  special regex to handle decimal point! --
                cont_qlist.append("CQS %s {*/%s:([\d\.]+)/*}" % (name, number))



            else:
                print "Feature values of mixed type / not string or int:"
                print values
                sys.exit(1)

            if NA_present:
                qlist.append("QS %s_is__NA_ {*/%s:_NA_/*}" % (name, number))
                cont_qlist.append("QS %s_is__NA_ {*/%s:_NA_/*}" % (name, number))

            qlist.extend(["", "", ""])  ## for formatting of final file
            cont_qlist.extend(["", "", ""])  ## for formatting of final file

        writelist(qlist, outfile, uni=True)
        writelist(cont_qlist, outfile + '.cont', uni=True)

        key_list = ["/%s:\t%s" % (number, name) for (number, name) in key_list]
        key_file = outfile + ".key"
        writelist(key_list, key_file, uni=True)

        values_list = ["%s\t%s\t%s\t%s" % (number, name, feat_type, values) \
                       for (number, name, feat_type, values) in values_list]
        values_file = outfile + ".values"
        writelist(values_list, values_file, uni=True)
예제 #11
0
    def format_question_set(self, raw_questions, outfile):
        """
        Take raw_questions: list of (number, name, value) triplets, ...
        
        Write formatted questions to outfile, and human-readable key to outfile.key
        
        Additionally, write question file including continuous questions (CQS) for 
        DNN training
        """
        print raw_questions
        unique_questions = {}
        for (number, name, value) in raw_questions:
            if (number, name) not in unique_questions:
                unique_questions[(number, name)] = []
            if value not in unique_questions[(number, name)]:
                unique_questions[(number, name)].append(value)

        qlist = []
        cont_qlist = []  ## write continuous questions about numerical features
        key_list = []  ## To make human-readable key to the feature set
        values_list = [
        ]  ## To make reference list of all values taken by a feature

        for ((number, name), values) in sorted(unique_questions.items()):
            values.sort()
            key_list.append((number, name))

            NA_present = False
            if '_NA_' in values:
                values.remove('_NA_')
                NA_present = True

            if all_entries_of_type(values, str):
                ## For strings, make single question for each item, no groups:
                for value in values:
                    qlist.append("QS %s_is_%s {*/%s:%s/*}" %
                                 (name, value, number, value))
                    cont_qlist.append("QS %s_is_%s {*/%s:%s/*}" %
                                      (name, value, number, value))
                values_list.append(
                    (number, name, 'CATEGORICAL', ' '.join(values)))

            elif all_entries_of_type(values, unicode):
                ## For strings, make single question for each item, no groups:
                for value in values:
                    qlist.append("QS %s_is_%s {*/%s:%s/*}" %
                                 (name, value, number, value))
                    cont_qlist.append("QS %s_is_%s {*/%s:%s/*}" %
                                      (name, value, number, value))
                values_list.append(
                    (number, name, 'CATEGORICAL', ' '.join(values)))

            elif all_entries_of_type(values, int):
                ## For integers, make single question for each item, and also groups
                ## based on single splits of the range.
                ## Aug 2014: modified -- just use split points -- questions based on
                ## single values are too arbitrary.
                #
                #for value in values:
                #    qlist.append("QS %s_is_%s {*/%s:%s/*}"%(name, value, number, value))

                values_list.append(
                    (number, name, 'NUMERIC', 'MAX:' + str(max(values))))

                cont_qlist.append("CQS %s {*/%s:(\d+)/*}" % (name, number))

                qlist.extend([""])  ## for formatting of final file
                for split_point_ix in range(1, len(values)):

                    split_point = values[split_point_ix]

                    wildcard_values = make_htk_wildcards(split_point)
                    formatted_sublist = [
                        "/%s:%s/" % (number, value)
                        for value in wildcard_values
                    ]
                    formatted_sublist = "*,*".join(formatted_sublist)

                    qlist.append("QS %s_<_%s {*%s*}" %
                                 (name, split_point, formatted_sublist))

            elif all_entries_of_type(values, float):
                ## floats -- only make CQS

                values_list.append(
                    (number, name, 'NUMERIC', 'MAX:' + str(max(values))))

                ## NB_  special regex to handle decimal point! --
                cont_qlist.append("CQS %s {*/%s:([\d\.]+)/*}" % (name, number))

            else:
                print "Feature values of mixed type / not string or int:"
                print values
                sys.exit(1)

            if NA_present:
                qlist.append("QS %s_is__NA_ {*/%s:_NA_/*}" % (name, number))
                cont_qlist.append("QS %s_is__NA_ {*/%s:_NA_/*}" %
                                  (name, number))

            qlist.extend(["", "", ""])  ## for formatting of final file
            cont_qlist.extend(["", "", ""])  ## for formatting of final file

        writelist(qlist, outfile, uni=True)
        writelist(cont_qlist, outfile + '.cont', uni=True)

        key_list = ["/%s:\t%s" % (number, name) for (number, name) in key_list]
        key_file = outfile + ".key"
        writelist(key_list, key_file, uni=True)


        values_list = ["%s\t%s\t%s\t%s"%(number, name, feat_type, values) \
                        for (number, name, feat_type, values) in values_list]
        values_file = outfile + ".values"
        writelist(values_list, values_file, uni=True)

        if self.processor_name != 'labelmaker': return

        transliterate = {
            "क": "k",
            "ख": "kh",
            "ग": "g",
            "घ": "gh",
            "ङ": "N1",
            "च": "c",
            "छ": "ch",
            "ज": "j",
            "झ": "jh",
            "ञ": "N2",
            "ट": "T",
            "ठ": "Th",
            "ड": "D",
            "ढ": "Dh",
            "ण": "N3",
            "त": "t",
            "थ": "th",
            "द": "d",
            "ध": "dh",
            "न": "n",
            "प": "p",
            "फ": "ph",
            "ब": "b",
            "भ": "bh",
            "म": "m",
            "य": "y",
            "र": "r",
            "ल": "l",
            "ळ": "L",
            "व": "v",
            "श": "s1",
            "ष": "s2",
            "स": "s",
            "ह": "h",
            "ं": "M",
            "ः": "H",
            "अ": "a",
            "आ": "A",
            "इ": "i",
            "ई": "I",
            "उ": "u",
            "ऊ": "U",
            "ऋ": "R",
            "ॠ": "RR",
            "ऌ": "l1",
            "ॡ": "l2",
            "ए": "e",
            "ऐ": "ai",
            "ओ": "o",
            "औ": "au",
            "लँ": "ln"
        }
        l = []
        l.append([transliterate[i] for i in 'अ इ उ ऋ ऌ'.split(' ')])
        l.append([transliterate[i] for i in 'आ ई ऊ ॠ ॡ ए ओ'.split(' ')])
        l.append([
            transliterate[i] for i in
            'क ख ग घ ङ च छ ज झ ञ ट ठ ड ढ ण त थ द ध न प फ ब भ म य र ल ळ व श ष स ह'
            .split(' ')
        ])
        l.append([
            transliterate[i]
            for i in 'क ख ग घ च छ ज झ ट ठ ड ढ त थ द ध प फ ब भ'.split(' ')
        ])
        #l.append([transliterate[i]+'x' for i in 'क ख ग घ च छ ज झ ट ठ ड ढ त थ द ध प फ ब भ'.split(' ')])
        l += [['k', 'g'], ['kh', 'gh'], ['k', 'kh'], ['g', 'gh']]
        l += [['c', 'j'], ['ch', 'jh'], ['c', 'ch'], ['j', 'jh']]
        l += [['T', 'D'], ['Th', 'Dh'], ['T', 'Th'], ['D', 'Dh']]
        l += [['t', 'd'], ['th', 'dh'], ['t', 'th'], ['d', 'dh']]
        l += [['p', 'b'], ['ph', 'bh'], ['p', 'ph'], ['b', 'bh']]
        l += [['N1', 'N2', 'N3', 'n', 'm']]
        l += [['h', 'H']]
        l += [['a', 'A']]
        l += [['i', 'I', 'y']]
        l += [['u', 'U', 'v']]
        l += [['R', 'RR', 'r']]
        l += [['l1', 'l2', 'l']]
        i = 0
        s = '\n'
        for p in range(0, 5):
            for j in l:
                s += 'QS q' + str(i) + ' {'
                for k in range(0, len(j)):
                    if k != 0: s += ','
                    s += '*/' + str(p) + ':' + j[k] + '/*'
                s += '}\n'
                i += 1

        print s
        f = open(outfile, 'a')
        f.write(s)
        f.close()