Example #1
0
 def __init__(self, lab, args):
   #Make a proto utt from the input
   if args.intype == "align_mlf":
     proto = utterance_load.proto_from_align_lab(lab)
     self.txtloaded = False
   elif args.intype == "hts_mlf":
     proto = utterance_load.proto_from_hts_lab(lab)
     self.txtloaded = False
   elif args.intype == "txt":
     proto = utterance_load.proto_from_txt(lab, args)
     self.txtloaded = True
   else:
     print "Error: Don't know what to do with intype - {0}".format(args.intype)
     sys.exit()
   
   #Construct the utt from the proto utt
   self.id = proto["id"]
   self.phonemes = []
   self.syllables = []
   self.words = []
   #We need to know which phoneme features this utterance is created with.
   self.phoneme_features = args.phoneme_features
   s_utt_pos = 0
   p_utt_pos = 0
   
   for wi, proto_word in enumerate(proto["utt"]):
     p_word_pos = 0
     word = Word()
     word.load_from_proto(proto_word, wi, p_utt_pos, s_utt_pos, len(proto["utt"]), self)
     self.words.append(word)
     for si, proto_syll in enumerate(proto_word["syllables"]):
       syll = Syllable()
       syll.load_from_proto(proto_syll, p_utt_pos, si, s_utt_pos, word, self)
       self.syllables.append(syll)
       for pi, proto_phone in enumerate(proto_syll["phonemes"]):
         phoneme = Phoneme()
         phoneme.load_from_proto(proto_phone, pi, p_utt_pos, p_word_pos, syll, word, self)
         self.phonemes.append(phoneme)
         p_word_pos += 1
         p_utt_pos += 1
       s_utt_pos += 1
       syll.add_phonemes()
     word.add_phonemes()
     word.add_syllables()
   
   #If we should use the stanford parse info
   if args.stanfordparse:
     if args.intype != "txt":
       utterance_load.load_txt(self, os.path.join(args.txtdir, self.id+".txt"))
     utterance_load.load_stanford_parse(self, args.parsedict[self.id])
Example #2
0
 def __init__(self, lab, args):
   #Make a proto utt from the input
   if args.intype == "align_mlf":
     proto = utterance_load.proto_from_align_lab(lab)
     self.txtloaded = False
   elif args.intype == "state_align_mlf":
     proto = utterance_load.proto_from_state_align_lab(lab)
     self.txtloaded = False
   elif args.intype == "hts_mlf":
     proto = utterance_load.proto_from_hts_lab(lab)
     self.txtloaded = False
   elif args.intype == "sire_lab":
     #As we need additional information here we check if args contains it
     if not hasattr(args, "context_type"):
       raise SiReError("You're trying to create an utterance from a SiRe label but have not told what kind of positional context_type was used!")
     if not hasattr(args, "HHEd_fix"):
       raise SiReError("You're trying to create an utterance from a SiRe label but have not told if HHEd_fix was used to create the labels!")
     proto = utterance_load.proto_from_sire_lab(lab, args.context_type, args.HHEd_fix)
     self.txtloaded = False
   elif args.intype == "txt":
     #Check if args has all the necessary elements and insert defaults if not.
     if not hasattr(args, 'pron_reduced') or args.pron_reduced == False:
       args.pron_reduced = False
       args.lm_score_dir = None
       args.reduction_level = 1.0
     else:
       #If we are we need to check if we know enough to do it and fail if we don't.
       if not hasattr(args, 'lm_score_dir'):
         raise SiReError("You have asked to produce a reduced phonemisation but no path to a directory containing LM word probabilities to base the reduction on.")
       if not hasattr(args, 'reduction_level'):
         raise SiReError("You have asked to produce a reduced phonemisation but not specified to which degree the sentence should be reduced.")
     if not hasattr(args, 'general_sil_phoneme'):
       print "Warning! args does not tell if there is a standard silence phoneme! Using default... (\"sil\")"
       args.general_sil_phoneme = "sil"
     if not hasattr(args, 'comma_is_pause'):
       print "Warning! args does not tell if commas should be used as pauses! Using default... (no)"
       args.comma_is_pause = False
     if not hasattr(args, 'stanford_pcfg_parse'):
       print "Warning! args does not tell if we are using stanford parsing! Using default... (no)"
       args.stanford_pcfg_parse = False
     if args.stanford_pcfg_parse == False:
       args.pcfgdict = False
     proto = utterance_load.proto_from_txt(lab, args.dictionary, args.general_sil_phoneme, args.comma_is_pause, args.stanford_pcfg_parse, args.pcfgdict, args.pron_reduced, args.lm_score_dir, args.reduction_level)
     self.txtloaded = True
   else:
     raise SiReError("Don't know what to do with intype - {0}".format(args.intype))
   
   #Construct the utt from the proto utt
   self.id = proto["id"]
   self.phonemes = []
   self.syllables = []
   self.words = []
   #We need to know which phoneme features this utterance is created with.
   if hasattr(args, 'dictionary'):
     self.phoneme_features = args.dictionary.phoneme_feats
   elif hasattr(args, 'phoneme_features'):
     self.phoneme_features = args.phoneme_features
   else:
     raise SiReError("args does not contain either a dictionary or a phoneme featureset!")
   s_utt_pos = 0
   p_utt_pos = 0
   
   for wi, proto_word in enumerate(proto["utt"]):
     p_word_pos = 0
     word = Word()
     word.load_from_proto(proto_word, wi, p_utt_pos, s_utt_pos, len(proto["utt"]), self)
     self.words.append(word)
     for si, proto_syll in enumerate(proto_word["syllables"]):
       syll = Syllable()
       syll.load_from_proto(proto_syll, p_utt_pos, si, s_utt_pos, word, self)
       self.syllables.append(syll)
       for pi, proto_phone in enumerate(proto_syll["phonemes"]):
         phoneme = Phoneme()
         phoneme.load_from_proto(proto_phone, pi, p_utt_pos, p_word_pos, syll, word, self)
         self.phonemes.append(phoneme)
         p_word_pos += 1
         p_utt_pos += 1
       s_utt_pos += 1
       syll.add_phonemes()
     word.add_phonemes()
     word.add_syllables()
   
   #If we should use the stanford pcfg parse info
   if hasattr(args, 'stanford_pcfg_parse') and args.stanford_pcfg_parse:
     print "Loading stanford pcfg parse info to utt..."
     if args.intype != "txt":
       utterance_load.load_txt(self, os.path.join(args.txtdir, self.id+".txt"))
     utterance_load.load_stanford_pcfg_parse(self, args.pcfgdict[self.id], args.comma_is_pause)
   
   #If we should use the stanford dependency parse info
   if hasattr(args, 'stanford_dependency_parse') and args.stanford_dependency_parse:
     print "Loading stanford dependency parse info to utt..."
     if args.intype != "txt" and self.txtloaded == False:
       utterance_load.load_txt(self, os.path.join(args.txtdir, self.id+".txt"))
     utterance_load.load_stanford_dependency_parse(self, args.dependencydict[self.id])
   
   #If we output a Festival context set we should modify the UTT a bit further.
   #Right now we use the full festival features as standard, but some operations, like corpus analysis, does not rely on this and it is a nuisance to have the text a requirement so this is still just an option.
   if args.festival_features == True:
     #We need to know the words
     if args.intype != "txt" and self.txtloaded == False:
       utterance_load.load_txt(self, os.path.join(args.txtdir, self.id+".txt"))
     #If we have a pcfg parse we have a proper POS tag mechanism and they have already been added
     if not args.stanford_pcfg_parse:
       pos.simple_festival_pos_predict(self)
     prosody.simple_festival_accent_predict(self)
Example #3
0
    def __init__(self, lab, args):
        #Make a proto utt from the input
        if args.intype == "align_mlf":
            proto = utterance_load.proto_from_align_lab(lab)
            self.txtloaded = False
        elif args.intype == "state_align_mlf":
            proto = utterance_load.proto_from_state_align_lab(lab)
            self.txtloaded = False
        elif args.intype == "hts_mlf":
            proto = utterance_load.proto_from_hts_lab(lab, args.state_level)
            self.txtloaded = False
        elif args.intype == "sire_lab":
            #As we need additional information here we check if args contains it
            if not hasattr(args, "context_type"):
                raise SiReError(
                    "You're trying to create an utterance from a SiRe label but have not told what kind of positional context_type was used!"
                )
            if not hasattr(args, "HHEd_fix"):
                raise SiReError(
                    "You're trying to create an utterance from a SiRe label but have not told if HHEd_fix was used to create the labels!"
                )
            proto = utterance_load.proto_from_sire_lab(lab, args.context_type,
                                                       args.HHEd_fix)
            self.txtloaded = False
        elif args.intype == "txt":
            #Check if args has all the necessary elements and insert defaults if not.
            if not hasattr(args, 'pron_reduced') or args.pron_reduced == False:
                args.pron_reduced = False
                args.lm_score_dir = None
                args.reduction_level = 1.0
            else:
                #If we are we need to check if we know enough to do it and fail if we don't.
                if not hasattr(args, 'lm_score_dir'):
                    raise SiReError(
                        "You have asked to produce a reduced phonemisation but no path to a directory containing LM word probabilities to base the reduction on."
                    )
                if not hasattr(args, 'reduction_level'):
                    raise SiReError(
                        "You have asked to produce a reduced phonemisation but not specified to which degree the sentence should be reduced."
                    )
            if not hasattr(args, 'general_sil_phoneme'):
                print "Warning! args does not tell if there is a standard silence phoneme! Using default... (\"sil\")"
                args.general_sil_phoneme = "sil"
            if not hasattr(args, 'comma_is_pause'):
                print "Warning! args does not tell if commas should be used as pauses! Using default... (no)"
                args.comma_is_pause = False
            if not hasattr(args, 'stanford_pcfg_parse'):
                print "Warning! args does not tell if we are using stanford parsing! Using default... (no)"
                args.stanford_pcfg_parse = False
            if args.stanford_pcfg_parse == False:
                args.pcfgdict = False
            proto = utterance_load.proto_from_txt(
                lab, args.dictionary, args.general_sil_phoneme,
                args.comma_is_pause, args.stanford_pcfg_parse, args.pcfgdict,
                args.pron_reduced, args.lm_score_dir, args.reduction_level)
            self.txtloaded = True
        else:
            raise SiReError("Don't know what to do with intype - {0}".format(
                args.intype))

        #Construct the utt from the proto utt
        self.id = proto["id"]
        self.phonemes = []
        self.syllables = []
        self.words = []
        #We need to know which phoneme features this utterance is created with.
        if hasattr(args, 'dictionary'):
            self.phoneme_features = args.dictionary.phoneme_feats
        elif hasattr(args, 'phoneme_features'):
            self.phoneme_features = args.phoneme_features
        else:
            raise SiReError(
                "args does not contain either a dictionary or a phoneme featureset!"
            )
        s_utt_pos = 0
        p_utt_pos = 0

        for wi, proto_word in enumerate(proto["utt"]):
            p_word_pos = 0
            word = Word()
            word.load_from_proto(proto_word, wi, p_utt_pos, s_utt_pos,
                                 len(proto["utt"]), self)
            self.words.append(word)
            for si, proto_syll in enumerate(proto_word["syllables"]):
                syll = Syllable()
                syll.load_from_proto(proto_syll, p_utt_pos, si, s_utt_pos,
                                     word, self)
                self.syllables.append(syll)
                for pi, proto_phone in enumerate(proto_syll["phonemes"]):
                    phoneme = Phoneme()
                    phoneme.load_from_proto(proto_phone, pi, p_utt_pos,
                                            p_word_pos, syll, word, self)
                    self.phonemes.append(phoneme)
                    p_word_pos += 1
                    p_utt_pos += 1
                s_utt_pos += 1
                syll.add_phonemes()
            word.add_phonemes()
            word.add_syllables()

        #If we should use the stanford pcfg parse info
        if hasattr(args, 'stanford_pcfg_parse') and args.stanford_pcfg_parse:
            print "Loading stanford pcfg parse info to utt..."
            if args.intype != "txt":
                utterance_load.load_txt(
                    self, os.path.join(args.txtdir, self.id + ".txt"))
            utterance_load.load_stanford_pcfg_parse(self,
                                                    args.pcfgdict[self.id],
                                                    args.comma_is_pause)

        #If we should use the stanford dependency parse info
        if hasattr(args, 'stanford_dependency_parse'
                   ) and args.stanford_dependency_parse:
            print "Loading stanford dependency parse info to utt..."
            if args.intype != "txt" and self.txtloaded == False:
                utterance_load.load_txt(
                    self, os.path.join(args.txtdir, self.id + ".txt"))
            utterance_load.load_stanford_dependency_parse(
                self, args.dependencydict[self.id])

        #If we output a Festival context set we should modify the UTT a bit further.
        #Right now we use the full festival features as standard, but some operations, like corpus analysis, does not rely on this and it is a nuisance to have the text a requirement so this is still just an option.
        if args.festival_features == True:
            #We need to know the words
            if args.intype != "txt" and self.txtloaded == False:
                # print "ID", self.id
                utterance_load.load_txt(
                    self, os.path.join(args.txtdir, self.id + ".txt"),
                    args.emphasis)
            #If we have a pcfg parse we have a proper POS tag mechanism and they have already been added
            if not args.stanford_pcfg_parse:
                pos.simple_festival_pos_predict(self)
            prosody.simple_festival_accent_predict(self)