コード例 #1
0
    def createCliqueFinder(self, text, *args):
        context, argsIVs, vdm = self.createCliqueFinderContext(text, *args)

        vdm.unloadAllPossible()

        context.resume()

        return FORANative.CliqueFinder(context)
コード例 #2
0
    def disable_big_lm(self):
        rows = 20
        cols = 20
        passes = 3

        def shouldDrop(row, col):
            return col > 1 or row < 20

        randomSeed = 1

        pageCounts = {}

        vecText = """
            Vector.range(%s, fun(colIx) {
                Vector.range(%s, { Vector.range(1000).paged}).sum()
                })
            """ % (cols, rows)

        context, argIVs, vdm = self.createCliqueFinderContext(
            """fun(v) { 
                    math.regression.linear.computeXTX(dataframe.DataFrame(v), splitLimit:100)
                    }""", vecText)

        vecOfVecs = argIVs[0]
        droppedPages = set()

        pageCoordinates = {}

        coordsToPage = {}

        for col in range(len(vecOfVecs)):
            vec = vecOfVecs[col]

            for row, vdid in enumerate(
                    vec.getVectorDataIdsForSlice(0, len(vec), vdm)):
                pageCoordinates[vdid.page] = (row, col)
                coordsToPage[(row, col)] = vdid.page
                rows = max(rows, row + 1)

        for col in range(cols):
            vec = vecOfVecs[col]
            for row, vdid in enumerate(
                    vec.getVectorDataIdsForSlice(0, len(vec), vdm)):
                if shouldDrop(row, col):
                    vdm.dropPageWithoutWritingToDisk(vdid.page)
                    droppedPages.add(vdid.page)

        print "dropped ", len(droppedPages), " pages"

        context.resume()

        for passIx in range(3):

            def inc(d, e):
                if e not in d:
                    d[e] = 0
                d[e] += 1

            for index in range(passes):
                t0 = time.time()
                added = 0
                while time.time() - t0 < 2.0:
                    randomSeed += 1

                    cliqueFinder = FORANative.CliqueFinder(context)

                    task = cliqueFinder.getRootTask()

                    cliqueFinder.searchFromTopOfTreeReturningCliquesCreated(
                        time.time() + 2.0, randomSeed)

                    for n in task.cliques():
                        added += 1
                        for p in n:
                            inc(pageCounts, p)

                print "ix: %s. cliques: %s" % (index, added)

            pageCountsSorted = sorted(list(pageCounts.values()))

            c1 = pageCountsSorted[len(pageCountsSorted) / 4]
            c2 = pageCountsSorted[len(pageCountsSorted) * 2 / 4]
            c3 = pageCountsSorted[len(pageCountsSorted) * 3 / 4]

            if c2 <= c1:
                c2 = c1 + 1
            if c3 <= c2:
                c3 = c2 + 1

            print "rows: ", rows
            print "cols: ", cols
            print "passes: ", passes
            print
            for row in range(rows):
                print "row %3s:        " % row,
                for col in range(cols):
                    if (row, col) in coordsToPage:
                        p = coordsToPage[(row, col)]
                        if p not in pageCounts or pageCounts[p] == 0:
                            sym = ' '
                        elif pageCounts[p] < c1:
                            sym = '.'
                        elif pageCounts[p] < c2:
                            sym = '-'
                        elif pageCounts[p] < c3:
                            sym = '*'
                        else:
                            sym = '#'
                    else:
                        sym = '?'

                    print sym,
                print
            print
            print
            print
            print
            print "Total pages mentioned: ", len(pageCounts), " of ", len(
                pageCoordinates)

        self.assertTrue(len(pageCounts) * 2 > len(pageCoordinates))