def _userModel(self, adapter, cnWordIdx): wordObj = adapter[cnWordIdx] # def _linearModel(wordSeq, start, stop): # costs = 0.0 # for word in wordSeq: # costs += word.durationSec # return costs def _assembleUserModelCosts(adapter, cnWordIdx, scoreFcn): leftSeg, rightSeg = adapter.getNeighborSegments(cnWordIdx) wordObj = adapter[cnWordIdx] if leftSeg == rightSeg and leftSeg is not None: #TODO costs = scoreFcn(len(leftSeg), leftSeg.durationSec) # debug("This should not happen! Costs {0}".format(costs)) elif leftSeg is not None and rightSeg is not None: costsLeft = scoreFcn(len(leftSeg), leftSeg.durationSec) costsRight = scoreFcn(len(rightSeg), rightSeg.durationSec) costsWhole = scoreFcn(len(leftSeg)+len(rightSeg)+1,\ leftSeg.durationSec + \ rightSeg.durationSec + \ wordObj.durationSec) costs = costsWhole - (costsLeft + costsRight ) # debug("Merge costs {0}".format(costs)) elif leftSeg is not None: costsLeft = scoreFcn(len(leftSeg), leftSeg.durationSec) costsWhole = scoreFcn(len(leftSeg)+1, leftSeg.durationSec\ + wordObj.durationSec) costs = costsWhole - (costsLeft ) # debug("Left segment costs {0}".format(costs)) elif rightSeg is not None: costsRight = scoreFcn(len(rightSeg), rightSeg.durationSec) costsWhole = scoreFcn(len(rightSeg)+1, rightSeg.durationSec\ + wordObj.durationSec) costs = costsWhole - (costsRight ) # debug("Right segment cost {0}".format(costs)) else: costs = scoreFcn(1, wordObj.durationSec) return costs try: # costs = _assembleUserModelCosts(wordObj, self._estimatedModel.score) # costs = _assembleUserModelCosts(wordObj, self._scoreFcnMemoizeWrapper) costs = _assembleUserModelCosts(adapter, cnWordIdx, self._scoreFcnFast) except Exception as e: debug("Error at word in utterance {0}".format(wordObj.id)) debug(e) raise # costs = _assembleUserModelCosts(wordObj, _linearModel) return costs
def _mainLoop(self, wordController, batchSizeSec): assert(batchSizeSec > 0.0) valueCoveredVectors = self._valueCoveredVectors # Init coveredPhonemes if self._coveredPhonemes is not None: coveredPhonemes = self._coveredPhonemes.copy() else: coveredPhonemes = Counter() # coveredPhonemes = sparse.csc_matrix((1, self.wordController._numPhonemes)) # Prepare datastructures log("Computing submodular gains") candidateList = [] gainDict = self._initGains(wordController, coveredPhonemes, valueCoveredVectors) gainQueue = FastBucketQueue() checkedCnWordSet = set() for (adapter, cnWordIdx), ratio in gainDict.iteritems(): cnWord = adapter[cnWordIdx] if not cnWord._marked and not cnWord._transcribed and not cnWord.ignore: gainQueue.push((adapter, cnWordIdx), ratio) i = 0 accumTime = 0.0 accumTimeFrameDurations = 0.0 try: log('Number of marked words: {0}, -> {1} percent marked'.format(self.wordController.getNumMarkedWords(),\ self.wordController.getNumMarkedWords() / self.wordController.getNumWords() )) except: import ipdb; ipdb.set_trace() reportAfterSamples = 10000 while len(gainQueue) > 0 and accumTime < batchSizeSec: # Get top element from list and add to selected set (adapter, cnWordIdx), candidateRatio = gainQueue.pop() candidateCnWord = adapter[cnWordIdx] # Update Gain, ratio, cost # if candidateCnWord not in checkedCnWordSet: candidateGain, candidateRatio, costs = self._computeRatioGain(adapter, cnWordIdx, coveredPhonemes, valueCoveredVectors) # debug("Costs: {0}".format(costs)) # print("\n\nIteration {0}, word: {1}\nGain: {2}, Costs: {3}, Ratio: {4}, Frames: {5}".format(i, candidateCnWord.id, candidateGain, costs, candidateRatio, candidateCnWord.duration)) # assert(candidateGain >= 0), "Gain must be non-negative in order to be submodular" # print "Queue.max(): {0}".format(gainQueue.max()) if candidateCnWord in checkedCnWordSet or candidateRatio >= gainQueue.max(): # print("--> Selected!") candidateList.append((adapter, cnWordIdx)) candidateAdapter = adapter wordPos = cnWordIdx candidateAdapter.assignWordToSegment(wordPos) # coveredPhonemes.update(candidateCnWord._tfIdfDict) coveredPhonemes += candidateCnWord._tfIdfDict valueCoveredVectors += candidateGain checkedCnWordSet.clear() candidateCnWord._marked = True # Cost-insensitive way to compute accumTime #accumTime += candidateCnWord.durationSec accumTimeFrameDurations += candidateCnWord.durationSec # Cost-sensitive way to compute accumTime (inaccurate because costs can be lowered through forming segments) accumTime += costs candidateCnWord.cost = costs candidateCnWord.utility = candidateGain candidateCnWord.ratio = candidateRatio candidateCnWord._score = i # Update gain neighbor words leftSeg, rightSeg = candidateAdapter.getNeighborSegments(wordPos) # if leftSeg is None and wordPos > 0: if wordPos > 0 and not candidateAdapter[wordPos-1]._marked and not candidateAdapter[wordPos-1].ignore: try: neighGain, neighRatio, neighCosts = self._computeRatioGain(adapter, wordPos-1, coveredPhonemes, valueCoveredVectors) gainQueue.update((adapter, wordPos-1), neighRatio) adapter[wordPos-1].cost = neighCosts adapter[wordPos-1].utility = neighGain adapter[wordPos-1].ratio = neighRatio except: warn(("Adapter {0} wordpos {1}".format(adapter.cnId, wordPos))) warn(adapter.visualizeWordsChain()) warn(adapter.visualizeSegments()) raise # print("--> word: {1}\nGain: {2}, Costs: {3}, Ratio: {4}, Frames: {5}"\ # .format(i, leftWord.id, neighGain, neighCosts, neighRatio, leftWord.duration)) # if rightSeg is None and wordPos < candidateAdapter.getNumWords()-1: if wordPos < candidateAdapter.getNumWords()-1 and not candidateAdapter[wordPos+1]._marked and not candidateAdapter[wordPos+1].ignore: neighGain, neighRatio, neighCosts = self._computeRatioGain(adapter, wordPos+1, coveredPhonemes, valueCoveredVectors) gainQueue.update((adapter, wordPos+1), neighRatio) adapter[wordPos+1].cost = neighCosts adapter[wordPos+1].utility = neighGain adapter[wordPos+1].ratio = neighRatio # print("--> word: {1}\nGain: {2}, Costs: {3}, Ratio: {4}, Frames: {5}"\ # .format(i, rightWord.id, neighGain, neighCosts, neighRatio, rightWord.duration)) i += 1 # print candidateAdapter.visualizeSegments() # # TODO Remove # for adapter, idx in candidateAdapter: # newGain, newRatio, newCosts = self._computeRatioGain(adapter, idx, coveredPhonemes, valueCoveredVectors) # adapter[idx].cost = newCosts # adapter[idx].utility = newGain # adapter[idx].ratio = newRatio # TODO REMOVE # debug("\n{0}".format(adapter.visualizeSegments().encode('utf-8'))) # debug("\nCandidate: \nRatio {0}\nUtility {1}\nCosts {2}".format(candidateRatio, candidateGain, costs)) # TODO remove # import ipdb; ipdb.set_trace() if i%reportAfterSamples == 0: log('Number of marked words: {0}, -> {1} percent marked'.format(i, float(i) / float(self.wordController.getNumWords()) )) else: # print("--> Refused!") gainQueue.push((adapter, cnWordIdx), candidateRatio) checkedCnWordSet.add(candidateCnWord) if len(gainQueue) <= 10: debug("Priority queue has only {0} elements left".format(len(gainQueue))) log("Sampled {0} hours of words \ (annotation cost model roughly predicts {0} hours of annotation)".format(accumTimeFrameDurations/3600.0, accumTime)) return candidateList, coveredPhonemes, valueCoveredVectors
def _mainLoop(self, wordController, batchSizeSec): assert(batchSizeSec > 0.0) valueCoveredVectors = self._valueCoveredVectors # Init coveredPhonemes if self._coveredPhonemes is not None: coveredPhonemes = self._coveredPhonemes.copy() else: coveredPhonemes = Counter() # coveredPhonemes = sparse.csc_matrix((1, self.wordController._numPhonemes)) # Prepare datastructures log("Computing submodular gains") candidateList = [] gainDict = self._initGains(wordController, coveredPhonemes, valueCoveredVectors) gainQueue = FastBucketQueue() checkedCnWordSet = set() for (adapter, cnWordIdx), gain in gainDict.iteritems(): cnWord = adapter[cnWordIdx] if not cnWord._marked and not cnWord._transcribed and not cnWord.ignore: gainQueue.push((adapter, cnWordIdx), gain) i = 0 accumTime = 0.0 log('Number of marked words: {0}, -> {1} percent marked'.format(self.wordController.getNumMarkedWords(),\ self.wordController.getNumMarkedWords() / self.wordController.getNumWords() )) reportAfterSamples = 10000 while len(gainQueue) > 0 and accumTime < batchSizeSec: # Get top element from list and add to selected set (adapter, cnWordIdx), candidateGain = gainQueue.pop() candidateCnWord = adapter[cnWordIdx] # Update Gain, ratio, cost # if candidateCnWord not in checkedCnWordSet: candidateGain = self._computeGain(adapter, cnWordIdx, coveredPhonemes, valueCoveredVectors) # print("\n\nIteration {0}, word: {1}\nGain: {2}, Costs: {3}, Ratio: {4}, Frames: {5}".format(i, candidateCnWord.id, candidateGain, costs, candidateRatio, candidateCnWord.duration)) # assert(candidateGain >= 0), "Gain must be non-negative in order to be submodular" # print "Queue.max(): {0}".format(gainQueue.max()) if candidateCnWord in checkedCnWordSet or candidateGain >= gainQueue.max(): # print("--> Selected!") candidateList.append((adapter, cnWordIdx)) candidateAdapter = adapter wordPos = cnWordIdx candidateAdapter.assignWordToSegment(wordPos) # coveredPhonemes.update(candidateCnWord._tfIdfDict) coveredPhonemes += candidateCnWord._tfIdfDict valueCoveredVectors += candidateGain checkedCnWordSet.clear() candidateCnWord._marked = True accumTime += candidateCnWord.durationSec candidateCnWord.utility = candidateGain # candidateCnWord._score = i # # Update gain neighbor words # leftSeg, rightSeg = candidateAdapter.getNeighborSegments(wordPos) # if leftSeg is None and wordPos > 0: # leftWord = candidateAdapter[wordPos-1] # neighGain, neighRatio, neighCosts = self._computeRatioGain(leftWord, coveredPhonemes, valueCoveredVectors) # gainQueue.update(leftWord, neighRatio) # # print("--> word: {1}\nGain: {2}, Costs: {3}, Ratio: {4}, Frames: {5}"\ # # .format(i, leftWord.id, neighGain, neighCosts, neighRatio, leftWord.duration)) # # if rightSeg is None and wordPos < candidateAdapter.getNumWords()-1: # rightWord = candidateAdapter[wordPos+1] # neighGain, neighRatio, neighCosts = self._computeRatioGain(rightWord, coveredPhonemes, valueCoveredVectors) # gainQueue.update(rightWord, neighRatio) # # print("--> word: {1}\nGain: {2}, Costs: {3}, Ratio: {4}, Frames: {5}"\ # # .format(i, rightWord.id, neighGain, neighCosts, neighRatio, rightWord.duration)) # TODO remove i += 1 # print candidateAdapter.visualizeSegments() if i%reportAfterSamples == 0: log('Number of marked words: {0}, -> {1} percent marked'.format(i, float(i) / float(self.wordController.getNumWords()) )) else: # print("--> Refused!") gainQueue.push((adapter, cnWordIdx), candidateGain) checkedCnWordSet.add(candidateCnWord) if len(gainQueue) <= 10: debug("Priority queue has only {0} elements left".format(len(gainQueue))) log("Sampled {0} hours of words".format(accumTime/3600.0)) return candidateList, coveredPhonemes, valueCoveredVectors