Esempio n. 1
0
   def __init__(self,
                data_key,
                filename,
                type=None,
                allophone_labeling=None):
       """
 :param str data_key: e.g. "data" or "classes"
 :param str filename: to Sprint cache archive
 :param str|None type: "feat" or "align"
 :param dict[str] allophone_labeling: kwargs for :class:`AllophoneLabeling`
 """
       self.data_key = data_key
       from SprintCache import open_file_archive
       self.sprint_cache = open_file_archive(filename)
       if not type:
           if data_key == "data":
               type = "feat"
           elif data_key == "classes":
               type = "align"
           else:
               # Some sensible defaults.
               if allophone_labeling:
                   type = "align"
               else:
                   type = "feat"
       assert type in ["feat", "align"]
       self.type = type
       self.allophone_labeling = None
       if allophone_labeling:
           from SprintCache import AllophoneLabeling
           self.allophone_labeling = AllophoneLabeling(
               **allophone_labeling)
           self.sprint_cache.setAllophones(
               self.allophone_labeling.allophone_file)
       else:
           assert type != "align", "need allophone_labeling for 'align' type"
       self.content_keys = [
           fn for fn in self.sprint_cache.file_list()
           if not fn.endswith(".attribs")
       ]
       if type == "align":
           self.num_labels = self.allophone_labeling.num_labels
           if self.num_labels < 2**7:
               self.dtype = "int8"
           elif self.num_labels < 2**15:
               self.dtype = "int16"
           else:
               assert self.num_labels < 2**31
               self.dtype = "int32"
           self.num_dims = 1
           if self.allophone_labeling.state_tying_by_allo_state_idx:
               self.type = "align_raw"
       elif type == "feat":
           self.num_labels = self._get_feature_dim()
           self.num_dims = 2
           self.dtype = "float32"
       else:
           assert False
Esempio n. 2
0
 def __init__(self, data_key, filename, data_type=None, allophone_labeling=None):
   """
   :param str data_key: e.g. "data" or "classes"
   :param str filename: to Sprint cache archive
   :param str|None data_type: "feat" or "align"
   :param dict[str] allophone_labeling: kwargs for :class:`AllophoneLabeling`
   """
   self.data_key = data_key
   from SprintCache import open_file_archive
   self.sprint_cache = open_file_archive(filename)
   if not data_type:
     if data_key == "data":
       data_type = "feat"
     elif data_key == "classes":
       data_type = "align"
     else:
       # Some sensible defaults.
       if allophone_labeling:
         data_type = "align"
       else:
         data_type = "feat"
   assert data_type in ["feat", "align"]
   self.type = data_type
   self.allophone_labeling = None
   if allophone_labeling:
     from SprintCache import AllophoneLabeling
     self.allophone_labeling = AllophoneLabeling(**allophone_labeling)
     self.sprint_cache.set_allophones(self.allophone_labeling.allophone_file)
   else:
     assert data_type != "align", "need allophone_labeling for 'align' type"
   self.content_keys = [fn for fn in self.sprint_cache.file_list() if not fn.endswith(".attribs")]
   if data_type == "align":
     self.num_labels = self.allophone_labeling.num_labels
     if self.num_labels < 2 ** 7:
       self.dtype = "int8"
     elif self.num_labels < 2 ** 15:
       self.dtype = "int16"
     else:
       assert self.num_labels < 2 ** 31
       self.dtype = "int32"
     self.num_dims = 1
     if self.allophone_labeling.state_tying_by_allo_state_idx:
       self.type = "align_raw"
   elif data_type == "feat":
     self.num_labels = self._get_feature_dim()
     self.num_dims = 2
     self.dtype = "float32"
   else:
     assert False
def generateClassLabel(args):

    # need to encode utf8
    reload(sys)
    sys.setdefaultencoding('utf-8')

    # Read the alignement Cache
    alignmentCaches = args.alignmentcache.split(',')
    acs = []
    for ac in alignmentCaches:
        if args.verbose:
            print ac
        acs.append(open_file_archive(ac))

    # Read the allophone file: allophone information will be matched
    # to the align cache file

    if args.allophonefile:
        for index, al in enumerate(args.allophonefile.split(',')):
            acs[index].setAllophones(al)

    # for each (image-allophone-state)  get its corresponding index from the lexicon
    # this will then be used as class labels for caffe
    lexicons = []
    parsers = []
    states = []
    states = map(int, args.states.split(','))
    for index, lex in enumerate(args.lexiconfile.split(',')):
        parsers.append(make_parser())
        lexicons.append(LexiconHandler(states[index]))
        parsers[-1].setContentHandler(lexicons[-1])
        parsers[-1].parse(lex)

    # to match every image with the mix from lexicon file:
    # read the path of the data from alignment cache and store it in the list "dataList"
    # but filter out the paths, which ends with a .attribs
    dataPathList = []

    #    print acs[0].file_list()
    #    exit()
    for key in acs[0].file_list(
    ):  #acs[0].ft:  # keys should be same in all caches,
        #therefore just going for the first alignment cache
        if (".attribs" not in key):
            dataPathList.append(key)

    # for each image get only its state values and allophone of each data and store it in "mixList"

    if args.verbose:
        print "number of files to processed: ", len(dataPathList)
    else:
        progBar = progressbar.progressBar(0, len(dataPathList), 77)
        FileCounter = 0
        progBar.draw()

    outFile = open(args.output, "wb")

    for i in xrange(len(dataPathList)):
        if args.verbose:
            print "current processed segment is " + str(
                i) + " th data", dataPathList[i]
        else:
            FileCounter += 1
            progBar.updateAmount(FileCounter)
            progBar.draw()
        #print "the name of the processed data is: " +
        #dataPathList[i]+ "\n"
        mixLists = []
        lexMixLists = []
        for index in xrange(len(acs)):
            acread = acs[index].read(dataPathList[i], "align")
            if args.verbose:
                for align in acread:
                    #print align
                    #print index
                    print acs[index].showAllophone(align[1]), " --> ", align
                    #print acs[index].allophones[align[1]], " --> ", align
            mixLists.append([(acs[index].showAllophone(align[1]).split('{')[0],
                              align[2]) for align in acread])

        # store the mixtureIndex from the lexicon using allophone name and state and store it in lexMixList
        for imageListIndex in xrange(len(mixLists[0])):
            lexMixLists.append([
                str(lexicons[innerindex].getMixturesByPhon(
                    mixLists[innerindex][imageListIndex][0])[
                        mixLists[innerindex][imageListIndex][1]])
                for innerindex in xrange(len(acs))
            ])
        newPath = "".join([args.images, dataPathList[i]])

        # rename Path(when 1 should not be used erase it from the path), check if args.useOne is set: this is for the case, when in some cases the file-directory  structure contains a 1,
        #  for further questions, ask Oscar Koller, Sepehr Zargaran, Ji-seung Shin
        if not args.useOne:
            newPath = newPath[:-2]
        if args.outputC3Dstyle:
            os.chdir(newPath)
            imageListLen = len(glob.glob(args.imageExtension))
            imageList = [''] * imageListLen
        else:
            os.chdir(newPath)
            imageList = glob.glob(args.imageExtension)
            # sort the file List according to its names
            imageList.sort()
            imageListLen = len(imageList)
        # write out
        if len(imageList) != len(lexMixLists):
            #for im in xrange(len(imageList)):
            #    print im, imageList[im]
            #print ""
            #for im in xrange(len(lexMixLists)):
            #    print im, lexMixLists[im]
            if not args.ignoreDifferentLengths:
                print 'error:', dataPathList[i], 'number of images ' + str(
                    len(imageList)) + ' != ' + str(
                        len(lexMixLists)) + ' alignment count '
                exit()
            else:
                print 'warning:', dataPathList[i], 'number of images ' + str(
                    len(imageList)) + ' != ' + str(
                        len(lexMixLists)) + ' alignment count '

        if args.outputC3Dstyle:
            # changed this output style, so we have the same number of outputs as inputs
            c3dStartFrame = 1
            for imageIndex in xrange(imageListLen):
                if imageIndex >= args.C3DclipLength / 2 and imageIndex < len(
                        lexMixLists) - args.C3DclipLength / 2:
                    c3dStartFrame += 1
                if imageIndex < imageListLen and imageIndex < len(lexMixLists):
                    outFile.write(newPath + '/' + str(imageList[imageIndex]) +
                                  ' ' + str(c3dStartFrame) + ' ' +
                                  ','.join(lexMixLists[imageIndex]) + '\n')
        else:
            if imageListLen <= len(lexMixLists):
                outFile.write(
                    ''.join(newPath + '/' + str(imageList[imageIndex]) + " " +
                            ','.join(lexMixLists[imageIndex]) + '\n'
                            for imageIndex in xrange(imageListLen)))
            else:
                outFile.write(
                    ''.join(newPath + '/' + str(imageList[imageIndex]) + " " +
                            ','.join(lexMixLists[imageIndex]) + '\n'
                            for imageIndex in xrange(len(lexMixLists))))
    outFile.close()