Beispiel #1
0
def process(phoneticFile,labelFile):
        format="%s=%s,%d,%d,%d,%d,%d\n"
        wav = labelFile.replace("-label.txt",".wav")

        print "Reading %s..." % phoneticFile
        f = codecs.open(phoneticFile,"r","utf-8")
        line = f.readline()
        phonetics = [ Ipa(item.strip()) for item in line.split(" ") ]       

        print "Reading %s..." % labelFile
        f = LabelFile()
        f.open(labelFile)
        
        labels = [adjustLabel(l) for l in f] 
        i = 0
        lastSymbol = u""

        for p in phonetics:
            print "Processing %s..." % unicode(p)
            record = Record(p,lastSymbol)
            record.read(labels,i)
            i = i + len(p)
#            while not record.finished:
#                row = iterator.next()
#                record.append(row)
            t = [wav,] + record.data
            
            lastSymbol = p.last()
            oto.write(format % tuple(t))
Beispiel #2
0
class Dataset:
    # Dataset is a set of files associated with an audio file.
    # The name of a dataset is same as the audio file.
    
    # This class is an utility to manage this set of files
    
    def __init__(self,name):
        self.name = name
        
        # A list of word(formed by phonetic)
        self.charList = []
        self._phoneticList = []
        
        self._labelFile = None
        
    def open(self):
        charFile = open(self.name+".txt","r")
        line = charFile.readline()
        self.charList = [unicode(item,"utf-8") for item in line.split(" ")]
        
        self._labelFile = LabelFile()
        self._labelFile.open(self.name + "-label.txt")
        
        self._samplingFile = SamplingFile()
        self._samplingFile.open(self.name + "-sampling.csv")
        
        self._createPhoneticList()
        
    def phoneticList(self):
        return self._phoneticList
                        
    def _createPhoneticList(self):
        for row in self._labelFile:
            sample = Sample()
            sample.timestamp = row[0]
            sample.phonetic = Ipa.simplifySymbol(row[2])
            
            record = self._samplingFile.search(float(row[0]));
            sample.zcr = float(record["ZCR"])
            sample.variance = float(record["Spectrum Variance"])
            sample.rms = float(record["RMS"])
            sample.str = float(record["STE"])
            
            self._phoneticList.append(sample)
Beispiel #3
0
 def open(self):
     charFile = open(self.name+".txt","r")
     line = charFile.readline()
     self.charList = [unicode(item,"utf-8") for item in line.split(" ")]
     
     self._labelFile = LabelFile()
     self._labelFile.open(self.name + "-label.txt")
     
     self._samplingFile = SamplingFile()
     self._samplingFile.open(self.name + "-sampling.csv")
     
     self._createPhoneticList()
Beispiel #4
0
filename = target + "-sampling.csv"
print "Reading %s..." % filename
samplingFile.open(filename)

filename = target + ".txt"
print "Reading %s..." % filename

f = codecs.open(filename,"r","utf-8")
line = f.readline()
phonetics = [ Ipa(item.strip()) for item in line.split(" ") ]

filter = Filter()
filter.process(table,samplingFile,phonetics)

labelFile = LabelFile()


for p in phonetics:
    try:
        labels = p.toLabel()
        for row in labels:
            labelFile.append(row)
    except IndexError:
        print u"[Error] The detection of %s is incomplete. The rest of phonentic will be skipped in output" % p
        break
        
filename = target + "-label.txt"
print "Writing to %s..." % filename
    
labelFile.save(filename)