Example #1
0
def cmd_showTotalDagSizes(argv):
    argvidx = 0
    cmdname = argv[argvidx]
    argvidx += 1
    assert 'cmd_' + cmdname == inspect.stack()[0][3]

    opts, args = getopt.getopt(argv[argvidx:], '', [])

    ## parse options
    for o, a in opts:
        pass

    ###

    dagSize_stretchToTotal = {}
    for stretch in stretches:
        dagSize_stretchToTotal[stretch] = 0
        pass

    dirpaths = args

    for dirpath in dirpaths:
        filenames = os.listdir(dirpath)
        for filename in filenames:
            filepath = dirpath + '/' + filename

            pr = utils.unpickleStuff(filepath)

            for stretch in stretches:
                dagSize_stretchToTotal[stretch] += \
                                                sum(map(lambda (size, count): size*count, pr.dagSize_stretchToCounts[stretch].items()))
                pass
            pass
        pass

    for stretch in stretches:
        print 'stretch', stretch, ': ', dagSize_stretchToTotal[stretch]
        pass
    return
def cmd_gencdf(argv):
    knownvalues = ['hdrsizes']

    def usage(cmdname):
        print 'usage: %s <whichvalue> [-n ...] [--bucket-size ...] dir [dir ...]' % (cmdname)
        print '         <whichvalue> must be one of the following: ' + ','.join(knownvalues)
        print '         -n integer: instead of processing all available datapoints, process only the first this number of data points. Error if data does not contain enough data points.'
        print '         --bucket-size integer: instead of outputting individual data points, put them into buckets of the specified size. each bucket is represented by its upperbound. this reduces the number of data points in output.'
        print '         each dir should contain only partial result pickle files.'
        return
    ####

    argvidx = 0
    cmdname = argv[argvidx]
    assert cmdname == 'gencdf'
    if len(argv) < 3:
        usage(cmdname)
        sys.exit(-1)
        pass
    argvidx += 1
    ###

    whichvalue = argv[argvidx]
    print whichvalue
    if not whichvalue in knownvalues:
        print '<whichvalue> must be one of the following:\n' + '\n'.join(knownvalues)
        sys.exit(-1)
        pass
    argvidx += 1
    ###

    opts, args = getopt.getopt(argv[argvidx:], '')

    ###

    dirpaths = args

    hdrLenCounts = {}

    for dirpath in dirpaths:
        filenames = os.listdir(dirpath)
        for filename in filenames:
            filepath = dirpath + '/' + filename

            pr = utils.unpickleStuff(filepath)

            for hdrLen, count in pr.hdrLenCounts.iteritems():
                if hdrLen in hdrLenCounts:
                    hdrLenCounts[hdrLen] += count
                    pass
                else:
                    hdrLenCounts[hdrLen] = count
                    pass
                pass
            pass
        pass

    totalCount = sum(hdrLenCounts.values())

    hdrLens = sorted(hdrLenCounts.keys())

    fil = open('cdfCodec4', 'w')
    fil.write('# total count: %u\n' % (totalCount))
    cumulativeCount = 0
    for hdrLen in hdrLens:
        cumulativeCount += hdrLenCounts[hdrLen]
        fraction = float(cumulativeCount) / totalCount
        fil.write('%u\t%f  # count of this value: %u\n' % (hdrLen, fraction, hdrLenCounts[hdrLen]))
        pass
    fil.close()
    return
def cmd_gencdf(argv):
    knownvalues = ['detourDistribution']

    def usage(cmdname):
        print 'usage: %s <whichvalue> [-n ...] [--bucket-size ...] <pickle file path>' % (cmdname)
        print '         <whichvalue> must be one of the following: ' + ','.join(knownvalues)
        print '         -n integer: instead of processing all available datapoints, process only the first this number of data points. Error if data does not contain enough data points.'
        print '         --bucket-size integer: instead of outputting individual data points, put them into buckets of the specified size. each bucket is represented by its upperbound. this reduces the number of data points in output.'
        return
    ####

    argvidx = 0
    cmdname = argv[argvidx]
    assert cmdname == 'gencdf'
    if len(argv) < 3:
        usage(cmdname)
        sys.exit(-1)
        pass
    argvidx += 1
    ###

    whichvalue = argv[argvidx]
    print whichvalue
    if not whichvalue in knownvalues:
        print '<whichvalue> must be one of the following:\n' + '\n'.join(knownvalues)
        sys.exit(-1)
        pass
    argvidx += 1
    ###

    bucketsize = None
    exactlyNumDataPoints = None

    opts, args = getopt.getopt(argv[argvidx:], 'n:',
                               ['bucket-size=', ])

    ## parse options
    for o, a in opts:
        if o == '-n':
            exactlyNumDataPoints = int(a)
            assert exactlyNumDataPoints > 0
            pass
        elif o == '--bucket-size':
            bucketsize = int(a)
            assert bucketsize > 0
            pass
        pass

    ###

    filenames = args

    percentlist = []

    numdatapointssofar = 0
    for filename in filenames:
        evalResult3 = utils.unpickleStuff(filename)

        # make sure the revisions of the obj match ours

        for fileResult3 in evalResult3.fileResults3:
            for srcDstPairResult3 in fileResult3.srcDstPairResults3.values():
                if srcDstPairResult3 is None:
                    # might be None if the (s,d) pair was disconnected
                    continue
                numdatapointssofar += 1
                if whichvalue == 'detourDistribution':
                    percentlist.append(srcDstPairResult3.fractionWithDetour * 100)
                    pass

                # have we collected enough?
                if exactlyNumDataPoints and exactlyNumDataPoints == numdatapointssofar:
                    break
                pass
            pass
        pass

    # finished gathering the data
    if exactlyNumDataPoints and (numdatapointssofar < exactlyNumDataPoints):
        raise Exception('found only %u data points.' % (numdatapointssofar))

    genCDF(sorted(percentlist), 'cdfDetourDistribution', bucketsize=bucketsize)

    return
def cmd_gencdf(argv):
    knownvalues = ['hdrsizes']

    def usage(cmdname):
        print 'usage: %s <whichvalue> [-n ...] [--bucket-size ...] <graph file path> [<graph file path> ...]' % (cmdname)
        print '         <whichvalue> must be one of the following: ' + ','.join(knownvalues)
        print '         -n integer: instead of processing all available datapoints, process only the first this number of data points. Error if data does not contain enough data points.'
        print '         --bucket-size integer: instead of outputting individual data points, put them into buckets of the specified size. each bucket is represented by its upperbound. this reduces the number of data points in output.'
        return
    ####

    argvidx = 0
    cmdname = argv[argvidx]
    assert cmdname == 'gencdf'
    if len(argv) < 3:
        usage(cmdname)
        sys.exit(-1)
        pass
    argvidx += 1
    ###

    whichvalue = argv[argvidx]
    print whichvalue
    if not whichvalue in knownvalues:
        print '<whichvalue> must be one of the following:\n' + '\n'.join(knownvalues)
        sys.exit(-1)
        pass
    argvidx += 1
    ###

    bucketsize = None
    exactlyNumDataPoints = None

    opts, args = getopt.getopt(argv[argvidx:], 'n:',
                               ['bucket-size=', ])

    ## parse options
    for o, a in opts:
        if o == '-n':
            exactlyNumDataPoints = int(a)
            assert exactlyNumDataPoints > 0
            pass
        elif o == '--bucket-size':
            bucketsize = int(a)
            assert bucketsize > 0
            pass
        pass

    ###

    filenames = args

    hdrLen2NormalLists = {}
    hdrLen2SmallerLists = {}
    for offsetPtrAlignment in offsetPtrAlignments:
        hdrLen2NormalLists[offsetPtrAlignment] = []
        hdrLen2SmallerLists[offsetPtrAlignment] = []
        pass
    dagsizeNormal = []
    dagsizeSmaller = []

    numdatapointssofar = 0
    for filename in filenames:
        evalResult3 = utils.unpickleStuff(filename)

        # make sure the revisions of the obj match ours

        for fileResult3 in evalResult3.fileResults3:
            for srcDstPairResult3 in fileResult3.srcDstPairResults3.values():
                if srcDstPairResult3 is None:
                    # might be None if the (s,d) pair was disconnected
                    continue
                numdatapointssofar += 1
                if whichvalue == 'hdrsizes':
                    # assume that these offsetPtrAlignment are same as
                    # we expected them
                    for offsetPtrAlignment, hdrlen in srcDstPairResult3.hdrLens2Normal.iteritems():
                        hdrLen2NormalLists[offsetPtrAlignment].append(float(hdrlen) / ( float(8) / offsetPtrAlignment))
                        pass

                    for offsetPtrAlignment, hdrlen in srcDstPairResult3.hdrLens2Smaller.iteritems():
                        hdrLen2SmallerLists[offsetPtrAlignment].append(float(hdrlen) / ( float(8) / offsetPtrAlignment))
                        pass

                    dagsizeNormal.append(srcDstPairResult3.dagsizeNormal)
                    dagsizeSmaller.append(srcDstPairResult3.dagsizeSmaller)
                    pass

                # have we collected enough?
                if exactlyNumDataPoints and exactlyNumDataPoints == numdatapointssofar:
                    break
                pass
            pass
        pass

    # finished gathering the data
    if exactlyNumDataPoints and (numdatapointssofar < exactlyNumDataPoints):
        raise Exception('found only %u data points.' % (numdatapointssofar))

    for offsetPtrAlignment in offsetPtrAlignments:
        hdrLen2NormalLists[offsetPtrAlignment].sort()
        hdrLen2SmallerLists[offsetPtrAlignment].sort()

        genCDF(hdrLen2NormalLists[offsetPtrAlignment], 'cdfGetHeader2NormalForAlignment%d' % offsetPtrAlignment, bucketsize=bucketsize)
        genCDF(hdrLen2SmallerLists[offsetPtrAlignment], 'cdfGetHeader2SmallerForAlignment%d' % offsetPtrAlignment, bucketsize=bucketsize)

        pass

    dagsizeNormal.sort()
    dagsizeSmaller.sort()

    genCDF(dagsizeNormal, 'cdfdagsizenormal', bucketsize=bucketsize)
    genCDF(dagsizeSmaller, 'cdfdagsizesmaller', bucketsize=bucketsize)

    return
def cmd_gencdf(argv):
    def usage(cmdname):
        print 'usage: %s <whichvalue> [--bucket-size ...] <graph file path> [<graph file path> ...]' % (cmdname)
        print '         --bucket-size integer: instead of outputting individual data points, put them into buckets of the specified size. each bucket is represented by its upperbound. this reduces the number of data points in output.'
        return
    ####

    argvidx = 0
    cmdname = argv[argvidx]
    assert cmdname == 'gencdf'
    if len(argv) < 3:
        usage(cmdname)
        sys.exit(-1)
        pass
    argvidx += 1
    ###

    whichvalue = argv[argvidx]
    knownvalues = ['values', 'diffs']
    print whichvalue
    if not whichvalue in knownvalues:
        print '<whichvalue> must be one of the following:\n' + '\n'.join(knownvalues)
        sys.exit(-1)
        pass
    argvidx += 1
    ###

    bucketsize = None
    if argv[argvidx] == '--bucket-size':
        bucketsize = int(argv[argvidx + 1])
        assert bucketsize > 0
        argvidx += 2
        pass
    ###

    filenames = argv[argvidx:]

    if whichvalue == 'values':
        lowerBounds = []
        upperBounds = []
        actualSizes = []
        pass
    elif whichvalue == 'diffs':
        diffsWithLowerBounds = []
        diffsWithUpperBounds = []
        pass

    for filename in filenames:
        evalResult3 = utils.unpickleStuff(filename)

        # make sure the revisions of the obj match ours

        for fileResult3 in evalResult3.fileResults3:
            for srcDstPairResult3 in fileResult3.srcDstPairResults3.values():
                if srcDstPairResult3 is None:
                    # might be None if the (s,d) pair was disconnected
                    continue
                if whichvalue == 'values':
                    lowerBounds.append(srcDstPairResult3.lowerBound)
                    if srcDstPairResult3.upperBound != None:
                        upperBounds.append(srcDstPairResult3.upperBound)
                        pass
                    actualSizes.append(srcDstPairResult3.numEdges)
                    pass
                elif whichvalue == 'diffs':
                    diffsWithLowerBounds.append(srcDstPairResult3.numEdges - srcDstPairResult3.lowerBound)
                    if srcDstPairResult3.upperBound != None:
                        diffsWithUpperBounds.append(srcDstPairResult3.upperBound - srcDstPairResult3.numEdges)
                        pass
                    pass
                pass
            pass
        pass

    # finished gathering the data

    if whichvalue == 'values':
        genCDF(sorted(lowerBounds), 'cdflowerbounds', bucketsize=bucketsize)
        genCDF(sorted(upperBounds), 'cdfupperbounds', bucketsize=bucketsize)
        genCDF(sorted(actualSizes), 'cdfactualsizes', bucketsize=bucketsize)
        pass
    elif whichvalue == 'diffs':
        genCDF(sorted(diffsWithLowerBounds), 'cdfdiffswithlowerbounds', bucketsize=bucketsize)
        genCDF(sorted(diffsWithUpperBounds), 'cdfdiffswithupperbounds', bucketsize=bucketsize)
        pass

    return
def cmd_gencdf(argv):
    knownvalues = ['maxStretch', 'timeStretchDownTo1']

    def usage(cmdname):
        print 'usage: %s <whichvalue> [--bucket-size ...] <result file path> [<result file path> ...]' % (cmdname)
        print '         <whichvalue> must be one of the following: ' + ', '.join(knownvalues)
        print '         --bucket-size integer: instead of outputting individual data points, put them into buckets of the specified size. each bucket is represented by its upperbound. this reduces the number of data points in output.'
        return
    ####

    argvidx = 0
    cmdname = argv[argvidx]
    assert cmdname == 'gencdf'
    if len(argv) < 3:
        usage(cmdname)
        sys.exit(-1)
        pass
    argvidx += 1
    ###

    whichvalue = argv[argvidx]
    knownvalues = ['maxStretch', 'timeStretchDownTo1']
    print whichvalue
    if not whichvalue in knownvalues:
        print '<whichvalue> must be one of the following:\n' + '\n'.join(knownvalues)
        sys.exit(-1)
        pass
    argvidx += 1
    ###

    bucketsize = None
    if argv[argvidx] == '--bucket-size':
        bucketsize = float(argv[argvidx + 1])
        assert bucketsize > 0
        argvidx += 2
        pass
    ###

    filenames = argv[argvidx:]

    dataPoints = []

    for filename in filenames:
        calculatorOutput = utils.unpickleStuff(filename)

        # make sure the revisions of the obj match ours

        for timeAfterFailure2Stretch in calculatorOutput:
            if whichvalue == 'maxStretch':
                maxStretch = max(map(lambda x: x[1], timeAfterFailure2Stretch))
                dataPoints.append(maxStretch)
                pass
            elif whichvalue == 'timeStretchDownTo1':
                timeStretchDownTo1 = filter(lambda x: x[1] == 1, timeAfterFailure2Stretch)[0][0]
                dataPoints.append(timeStretchDownTo1)
                pass
            pass
        pass

    # finished gathering the data
    dataPoints.sort()
    if whichvalue == 'maxStretch':
        genCDF(dataPoints, 'cdfmaxstretch', bucketsize=bucketsize)
        pass
    elif whichvalue == 'timeStretchDownTo1':
        genCDF(dataPoints, 'timeStretchDownTo1', bucketsize=bucketsize)
        pass

    return
Example #7
0
def cmd_gencdf(argv):
    knownvalues = ['all']

    def usage(cmdname):
        print 'usage: %s <whichvalue> [-n ...] [--bucket-size ...] dir [dir ...]' % (cmdname)
        print '         <whichvalue> must be one of the following: ' + ','.join(knownvalues)
        print '         -n integer: instead of processing all available datapoints, process only the first this number of data points. Error if data does not contain enough data points.'
        print '         --bucket-size integer: instead of outputting individual data points, put them into buckets of the specified size. each bucket is represented by its upperbound. this reduces the number of data points in output.'
        return
    ####

    argvidx = 0
    cmdname = argv[argvidx]
    assert cmdname == 'gencdf'
    if len(argv) < 3:
        usage(cmdname)
        sys.exit(-1)
        pass
    argvidx += 1
    ###

    whichvalue = argv[argvidx]
    print whichvalue
    if not whichvalue in knownvalues:
        print '<whichvalue> must be one of the following:\n' + '\n'.join(knownvalues)
        sys.exit(-1)
        pass
    argvidx += 1
    ###

    bucketsize = None
    exactlyNumDataPoints = None

    opts, args = getopt.getopt(argv[argvidx:], 'n:',
                               ['bucket-size=', ])

    ## parse options
    for o, a in opts:
        if o == '-n':
            exactlyNumDataPoints = int(a)
            raise Exception("not yet supported")
            assert exactlyNumDataPoints > 0
            pass
        elif o == '--bucket-size':
            bucketsize = int(a)
            assert bucketsize > 0
            pass
        pass

    ###

    def updateCounts(curValueToCounts, moreValueToCounts):
        for value, count in moreValueToCounts.iteritems():
            if value in curValueToCounts:
                curValueToCounts[value] += count
                pass
            else:
                curValueToCounts[value] = count
                pass
            pass
        pass
    ###

    lowerBoundCounts = {}
    dagSize_stretchToCounts = {}
    codec2HdrLen_stretchToCounts = {}
    codec4HdrLen_stretchToCounts = {}
    singlePath_encodingLen_counts = {}
    for stretch in stretches:
        dagSize_stretchToCounts[stretch] = {}
        codec2HdrLen_stretchToCounts[stretch] = {}
        codec4HdrLen_stretchToCounts[stretch] = {}
        pass

    dirpaths = args

    for dirpath in dirpaths:
        filenames = os.listdir(dirpath)
        for filename in filenames:
            filepath = dirpath + '/' + filename

            pr = utils.unpickleStuff(filepath)

            updateCounts(lowerBoundCounts, pr.lowerBoundCounts)
            updateCounts(
                singlePath_encodingLen_counts, pr.singlePath_encodingLen_counts)

            for stretch in stretches:
                updateCounts(
                    dagSize_stretchToCounts[stretch],
                    pr.dagSize_stretchToCounts[stretch])
                updateCounts(
                    codec2HdrLen_stretchToCounts[stretch],
                    pr.codec2HdrLen_stretchToCounts[stretch])
                updateCounts(
                    codec4HdrLen_stretchToCounts[stretch],
                    pr.codec4HdrLen_stretchToCounts[stretch])
                pass
            pass
        pass

    genCDFFromCounts(lowerBoundCounts, 'cdf_lowerBound')
    genCDFFromCounts(
        singlePath_encodingLen_counts, 'cdf_singlePath_encodingLen')

    for stretch in stretches:
        genCDFFromCounts(
            dagSize_stretchToCounts[stretch],
            'cdf_dagSize_stretch_%u' % (stretch))
        genCDFFromCounts(
            codec2HdrLen_stretchToCounts[stretch],
            'cdf_codec2HdrLen_stretch_%u' % (stretch))
        genCDFFromCounts(
            codec4HdrLen_stretchToCounts[stretch],
            'cdf_codec4HdrLen_stretch_%u' % (stretch))
        pass

    return
Example #8
0
def cmd_showPairsWithDagSizeSmallerThanLowerBound(argv):
    argvidx = 0
    cmdname = argv[argvidx]
    argvidx += 1
    assert 'cmd_' + cmdname == inspect.stack()[0][3]

    showDetails = False

    opts, args = getopt.getopt(argv[argvidx:], '',
                               ['showDetails', ])
    ## parse options
    for o, a in opts:
        if o == '--showDetails':
            showDetails = True
            pass
        pass

    dirpaths = args
    assert len(dirpaths) > 0

    curGraphFilePath = None

    for dirpath in dirpaths:
        filenames = os.listdir(dirpath)
        for filename in filenames:
            filepath = dirpath + '/' + filename

            pr = utils.unpickleStuff(filepath)

            if showDetails and (pr.filename != curGraphFilePath):
                g, _ = utils.textToG(pr.filename, useInt=False,
                                       ignoreWeights=not pr.weighted)

                # calculate M for computeBounds()
                if pr.weighted:
                    weights = map(lambda (u, v, edgeData): edgeData['weight'],
                                  g.edges(data=True))
                    maxWeight = max(weights)
                    minWeight = min(weights)
                    assert minWeight > 0
                    M = float(maxWeight) / float(minWeight)
                    pass
                else:
                    M = float(1)
                    pass
                pass
                
            for stretch in stretches:
                for (s,d) in pr.pairsWithDagSizeSmallerThanLowerBound[stretch]:
                    if showDetails:
                        pp, dp = getDgWithStretch(g, s, d, pr.weighted, stretch)
                        if dp is None:
                            dp = {}
                            pass
                        dag, virtualDetourPaths = approach2.getDagWithVnodes(
                            pp, dp, returnDetourPathsWithVNodes=True)
                        lowerBound = computeBounds(g, s, d, pp, M, pr.weighted)
                        print 's,d=%s,%s; #OfEdges(pp)=%u, #OfEdges(dps)=%u, lowerBound=%u, dagSize=%u' % (
                            repr(s), repr(d), len(pp)-1,
                            sum(map(lambda p: len(p) - 1, dp.values())),
                            lowerBound, dag.number_of_edges()
                            )
                        pass
                    else:
                        print 's,d=%s,%s' % (repr(s),repr(d))
                        pass
                    pass
                pass
            pass
        pass
    return