def _userModel(self, adapter, cnWordIdx):
        wordObj = adapter[cnWordIdx]

        # def _linearModel(wordSeq, start, stop):
        #     costs = 0.0
        #     for word in wordSeq:
        #         costs += word.durationSec
        #     return costs

        def _assembleUserModelCosts(adapter, cnWordIdx, scoreFcn):
            leftSeg, rightSeg = adapter.getNeighborSegments(cnWordIdx)
            wordObj = adapter[cnWordIdx]

            if leftSeg == rightSeg and leftSeg is not None:
                #TODO
                costs = scoreFcn(len(leftSeg), leftSeg.durationSec)
                # debug("This should not happen! Costs {0}".format(costs))
            elif leftSeg is not None and rightSeg is not None:
                costsLeft = scoreFcn(len(leftSeg), leftSeg.durationSec)
                costsRight = scoreFcn(len(rightSeg), rightSeg.durationSec)
                costsWhole = scoreFcn(len(leftSeg)+len(rightSeg)+1,\
                                      leftSeg.durationSec  + \
                                      rightSeg.durationSec + \
                                      wordObj.durationSec)
                costs = costsWhole - (costsLeft + costsRight )
                # debug("Merge costs {0}".format(costs))
            elif leftSeg is not None:
                costsLeft = scoreFcn(len(leftSeg), leftSeg.durationSec)
                costsWhole = scoreFcn(len(leftSeg)+1, leftSeg.durationSec\
                                      + wordObj.durationSec)
                costs = costsWhole - (costsLeft )
                # debug("Left segment costs {0}".format(costs))
            elif rightSeg is not None:
                costsRight = scoreFcn(len(rightSeg), rightSeg.durationSec)
                costsWhole = scoreFcn(len(rightSeg)+1, rightSeg.durationSec\
                                      + wordObj.durationSec)
                costs = costsWhole - (costsRight )
                # debug("Right segment cost {0}".format(costs))
            else:
                costs = scoreFcn(1, wordObj.durationSec)
                
            return costs

        try:
            # costs = _assembleUserModelCosts(wordObj, self._estimatedModel.score)
            # costs = _assembleUserModelCosts(wordObj, self._scoreFcnMemoizeWrapper)
            costs = _assembleUserModelCosts(adapter, cnWordIdx, self._scoreFcnFast)
        except Exception as e:
            debug("Error at word in utterance {0}".format(wordObj.id))
            debug(e)
            raise
        # costs = _assembleUserModelCosts(wordObj, _linearModel)
        return costs
    def _mainLoop(self, wordController, batchSizeSec):
        assert(batchSizeSec > 0.0)

        valueCoveredVectors = self._valueCoveredVectors

        # Init coveredPhonemes
        if self._coveredPhonemes is not None:
            coveredPhonemes = self._coveredPhonemes.copy()
        else:
            coveredPhonemes = Counter()
            # coveredPhonemes = sparse.csc_matrix((1, self.wordController._numPhonemes))


        # Prepare datastructures
        log("Computing submodular gains")

        candidateList = []
        gainDict = self._initGains(wordController, coveredPhonemes, valueCoveredVectors)
        gainQueue = FastBucketQueue()
        checkedCnWordSet = set()
        for (adapter, cnWordIdx), ratio in gainDict.iteritems():
            cnWord = adapter[cnWordIdx]
            if not cnWord._marked and not cnWord._transcribed and not cnWord.ignore:
                gainQueue.push((adapter, cnWordIdx), ratio)

        i = 0
        accumTime = 0.0
        accumTimeFrameDurations = 0.0
        try:
            log('Number of marked words: {0}, -> {1} percent marked'.format(self.wordController.getNumMarkedWords(),\
                                                                        self.wordController.getNumMarkedWords() / self.wordController.getNumWords() ))
        except:
            import ipdb; ipdb.set_trace()
        reportAfterSamples = 10000
        while len(gainQueue) > 0 and accumTime < batchSizeSec:

            # Get top element from list and add to selected set
            (adapter, cnWordIdx), candidateRatio = gainQueue.pop()
            candidateCnWord = adapter[cnWordIdx]

            # Update Gain, ratio, cost
            # if candidateCnWord not in checkedCnWordSet:
            candidateGain, candidateRatio, costs = self._computeRatioGain(adapter, cnWordIdx, coveredPhonemes, valueCoveredVectors)
            # debug("Costs: {0}".format(costs))
            # print("\n\nIteration {0}, word: {1}\nGain: {2}, Costs: {3}, Ratio: {4}, Frames: {5}".format(i, candidateCnWord.id, candidateGain, costs, candidateRatio, candidateCnWord.duration))
            # assert(candidateGain >= 0), "Gain must be non-negative in order to be submodular"

            # print "Queue.max(): {0}".format(gainQueue.max())
            if candidateCnWord in checkedCnWordSet or candidateRatio >= gainQueue.max():
                # print("--> Selected!")
                candidateList.append((adapter, cnWordIdx))
                candidateAdapter = adapter
                wordPos = cnWordIdx
                candidateAdapter.assignWordToSegment(wordPos)
                # coveredPhonemes.update(candidateCnWord._tfIdfDict)
                coveredPhonemes += candidateCnWord._tfIdfDict
                valueCoveredVectors += candidateGain
                checkedCnWordSet.clear()
                candidateCnWord._marked = True

                # Cost-insensitive way to compute accumTime
                #accumTime += candidateCnWord.durationSec
                accumTimeFrameDurations += candidateCnWord.durationSec

                # Cost-sensitive way to compute accumTime (inaccurate because costs can be lowered through forming segments)
                accumTime += costs
                candidateCnWord.cost = costs
                candidateCnWord.utility = candidateGain
                candidateCnWord.ratio = candidateRatio
                candidateCnWord._score = i

               


                # Update gain neighbor words
                leftSeg, rightSeg = candidateAdapter.getNeighborSegments(wordPos)
                # if leftSeg is None and wordPos > 0:
                if wordPos > 0 and not candidateAdapter[wordPos-1]._marked and not candidateAdapter[wordPos-1].ignore:
                    try:
                        neighGain, neighRatio, neighCosts = self._computeRatioGain(adapter, wordPos-1, coveredPhonemes, valueCoveredVectors)
                        gainQueue.update((adapter, wordPos-1), neighRatio)
                        adapter[wordPos-1].cost = neighCosts
                        adapter[wordPos-1].utility = neighGain
                        adapter[wordPos-1].ratio = neighRatio
                    except:
                        warn(("Adapter {0} wordpos {1}".format(adapter.cnId, wordPos)))
                        warn(adapter.visualizeWordsChain())
                        warn(adapter.visualizeSegments())
                        raise
                    # print("--> word: {1}\nGain: {2}, Costs: {3}, Ratio: {4}, Frames: {5}"\
                          # .format(i, leftWord.id, neighGain, neighCosts, neighRatio, leftWord.duration))

                # if rightSeg is None and wordPos < candidateAdapter.getNumWords()-1:
                if wordPos < candidateAdapter.getNumWords()-1 and not candidateAdapter[wordPos+1]._marked and not candidateAdapter[wordPos+1].ignore:
                    neighGain, neighRatio, neighCosts = self._computeRatioGain(adapter, wordPos+1, coveredPhonemes, valueCoveredVectors)
                    gainQueue.update((adapter, wordPos+1), neighRatio)
                    adapter[wordPos+1].cost = neighCosts
                    adapter[wordPos+1].utility = neighGain
                    adapter[wordPos+1].ratio = neighRatio
                    # print("--> word: {1}\nGain: {2}, Costs: {3}, Ratio: {4}, Frames: {5}"\
                          # .format(i, rightWord.id, neighGain, neighCosts, neighRatio, rightWord.duration))
                i += 1
                # print candidateAdapter.visualizeSegments()


                # # TODO Remove
                # for adapter, idx in candidateAdapter:
                #     newGain, newRatio, newCosts = self._computeRatioGain(adapter, idx, coveredPhonemes, valueCoveredVectors)
                #     adapter[idx].cost = newCosts
                #     adapter[idx].utility = newGain 
                #     adapter[idx].ratio = newRatio

                # TODO REMOVE
                # debug("\n{0}".format(adapter.visualizeSegments().encode('utf-8')))
                # debug("\nCandidate: \nRatio {0}\nUtility {1}\nCosts {2}".format(candidateRatio, candidateGain, costs))
                # TODO remove
                # import ipdb; ipdb.set_trace()

                if i%reportAfterSamples == 0:
                    log('Number of marked words: {0}, -> {1} percent marked'.format(i, float(i) / float(self.wordController.getNumWords()) ))

            else:
                # print("--> Refused!")
                gainQueue.push((adapter, cnWordIdx), candidateRatio)
                checkedCnWordSet.add(candidateCnWord)

            if len(gainQueue) <= 10:
                debug("Priority queue has only {0} elements left".format(len(gainQueue)))

        log("Sampled {0} hours of words \
                (annotation cost model roughly predicts {0} hours of annotation)".format(accumTimeFrameDurations/3600.0, accumTime))


        return candidateList, coveredPhonemes, valueCoveredVectors
    def _mainLoop(self, wordController, batchSizeSec):
        assert(batchSizeSec > 0.0)

        valueCoveredVectors = self._valueCoveredVectors

        # Init coveredPhonemes
        if self._coveredPhonemes is not None:
            coveredPhonemes = self._coveredPhonemes.copy()
        else:
            coveredPhonemes = Counter()
            # coveredPhonemes = sparse.csc_matrix((1, self.wordController._numPhonemes))


        # Prepare datastructures
        log("Computing submodular gains")
        candidateList = []
        gainDict = self._initGains(wordController, coveredPhonemes, valueCoveredVectors)
        gainQueue = FastBucketQueue()
        checkedCnWordSet = set()
        for (adapter, cnWordIdx), gain in gainDict.iteritems():
            cnWord = adapter[cnWordIdx]
            if not cnWord._marked and not cnWord._transcribed and not cnWord.ignore:
                gainQueue.push((adapter, cnWordIdx), gain)

        i = 0
        accumTime = 0.0
        log('Number of marked words: {0}, -> {1} percent marked'.format(self.wordController.getNumMarkedWords(),\
                                                                        self.wordController.getNumMarkedWords() / self.wordController.getNumWords() ))
        reportAfterSamples = 10000
        while len(gainQueue) > 0 and accumTime < batchSizeSec:

            # Get top element from list and add to selected set
            (adapter, cnWordIdx), candidateGain = gainQueue.pop()
            candidateCnWord = adapter[cnWordIdx]

            # Update Gain, ratio, cost
            # if candidateCnWord not in checkedCnWordSet:
            candidateGain = self._computeGain(adapter, cnWordIdx, coveredPhonemes, valueCoveredVectors)
            # print("\n\nIteration {0}, word: {1}\nGain: {2}, Costs: {3}, Ratio: {4}, Frames: {5}".format(i, candidateCnWord.id, candidateGain, costs, candidateRatio, candidateCnWord.duration))
            # assert(candidateGain >= 0), "Gain must be non-negative in order to be submodular"

            # print "Queue.max(): {0}".format(gainQueue.max())
            if candidateCnWord in checkedCnWordSet or candidateGain >= gainQueue.max():
                # print("--> Selected!")
                candidateList.append((adapter, cnWordIdx))
                candidateAdapter = adapter
                wordPos = cnWordIdx
                candidateAdapter.assignWordToSegment(wordPos)
                # coveredPhonemes.update(candidateCnWord._tfIdfDict)
                coveredPhonemes += candidateCnWord._tfIdfDict
                valueCoveredVectors += candidateGain
                checkedCnWordSet.clear()
                candidateCnWord._marked = True
                accumTime += candidateCnWord.durationSec
                candidateCnWord.utility = candidateGain
                # candidateCnWord._score = i

                # # Update gain neighbor words
                # leftSeg, rightSeg = candidateAdapter.getNeighborSegments(wordPos)
                # if leftSeg is None and wordPos > 0:
                #     leftWord = candidateAdapter[wordPos-1]
                #     neighGain, neighRatio, neighCosts = self._computeRatioGain(leftWord, coveredPhonemes, valueCoveredVectors)
                #     gainQueue.update(leftWord, neighRatio)
                #     # print("--> word: {1}\nGain: {2}, Costs: {3}, Ratio: {4}, Frames: {5}"\
                #           # .format(i, leftWord.id, neighGain, neighCosts, neighRatio, leftWord.duration))
                #
                # if rightSeg is None and wordPos < candidateAdapter.getNumWords()-1:
                #     rightWord = candidateAdapter[wordPos+1]
                #     neighGain, neighRatio, neighCosts = self._computeRatioGain(rightWord, coveredPhonemes, valueCoveredVectors)
                #     gainQueue.update(rightWord, neighRatio)
                #     # print("--> word: {1}\nGain: {2}, Costs: {3}, Ratio: {4}, Frames: {5}"\
                #           # .format(i, rightWord.id, neighGain, neighCosts, neighRatio, rightWord.duration))
                # TODO remove
                i += 1
                # print candidateAdapter.visualizeSegments()

                if i%reportAfterSamples == 0:
                    log('Number of marked words: {0}, -> {1} percent marked'.format(i, float(i) / float(self.wordController.getNumWords()) ))

            else:
                # print("--> Refused!")
                gainQueue.push((adapter, cnWordIdx), candidateGain)
                checkedCnWordSet.add(candidateCnWord)

            if len(gainQueue) <= 10:
                debug("Priority queue has only {0} elements left".format(len(gainQueue)))

        log("Sampled {0} hours of words".format(accumTime/3600.0))

        return candidateList, coveredPhonemes, valueCoveredVectors