def GenerateExactMRCFromTrace(exactTraceName):

    """
    -------------------------------------------------------
    This program creates an exact MRC curve using my histogram
    class by parsing the output of a Parda trace analysis.
    
    This allows us to compare the cache curves resulting
    from my implementation of SHARDS against the actual cache curve.
    -------------------------------------------------------
    """    
    
    FIRST_LINE = 2
    PATH_TO_TRACE_DIR = os.path.normpath(os.path.join(os.getcwd(), ".."))
    
    fp = open(os.path.join(PATH_TO_TRACE_DIR, "Traces", exactTraceName), "r", encoding = "utf-8")
    
    actualMRCHistogram = Histogram()
    
    for i in range(0,FIRST_LINE):
        thisRecord = fp.readline().strip()
    
    thisRecord = fp.readline().strip()
    while thisRecord != "" and thisRecord[0].isdigit():
        thisRecord = thisRecord.split()
        actualMRCHistogram.AddBucket(int(thisRecord[0]), int(thisRecord[1]))
        thisRecord = fp.readline().strip()
    
    #Need to add the infinite stack depth indicated at the end of the file
    thisRecord = fp.readline().split()
    actualMRCHistogram.AddBucket(-1, int(thisRecord[1]))
    
    return actualMRCHistogram
def ClassicLRUSHARDS(traceName):
    
    """
    Since this is fixed size SHARDS, start by sampling every reference. The sampling rate
    will be lowered accordingly as the SampleSet reaches maximum capacity.
    """
    SAMPLE_RATE = 0.5

    mySampleSet = SampleSet(S_MAX)
    SHARDSHistogram = Histogram()
    myDistanceTree = LRUTree()
    i = 0
    
    fp = open(os.path.join(PATH_TO_TRACE_DIR, "Traces",traceName), "r", encoding = "utf-8")
    
    #Get the current time
    t1 = clock()
    
    thisReference = fp.readline().strip()
    while thisReference != "":
        i += 1
        thisReference = thisReference[0:INDEX_OF_LAST_CHAR_IN_REF]

        #We only sample those disk references which satisfy our sampling condition 
        if (hash(thisReference) % 100) < SAMPLE_RATE:
 
            """
            Check if the disk reference is in our sample set
            """
            #If no
            if mySampleSet.FindElement(thisReference) == False:
                #Insert the reference into the sample set
                mySampleSet.InsertElement(thisReference, hash(thisReference))
                #Insert the element into the distance tree
                myDistanceTree.InsertElement(thisReference)
                #A miss occurred ("infinite" stack depth), record it in the histogram
                SHARDSHistogram.IncrementBucket(-1)
            #If yes
            else:
                #Since the address is already in the sample set, it is also in the tree. Get the stack depth
                stackDistanceOfThisReference = myDistanceTree.GetDistanceOfElement(thisReference)
                 
                #This reuse distance needs to be scaled before it is inserted into the histogram
                rescaleFactor = SAMPLE_RATE/100
                
                stackDistanceOfThisReference /= SAMPLE_RATE/100
                stackDistanceOfThisReference = round(stackDistanceOfThisReference)
                 
                # Remove it from the stack and re-push it (since the stack distance of this element is now 1)
                myDistanceTree.RemoveElement(thisReference)
                myDistanceTree.InsertElement(thisReference)
                     
                # Update the histogram with the old stack depth of thisReference
                if SHARDSHistogram.BucketInHistogram(stackDistanceOfThisReference):
                    SHARDSHistogram.IncrementBucket(stackDistanceOfThisReference)
                else:
                    SHARDSHistogram.AddBucket(stackDistanceOfThisReference, 1)
        if i % 10000 == 0:
            print(i)            
        thisReference = fp.readline().strip()

    #SHARDSHistogram.PrintDetailedInfo()
    
    #Get the time again and calculate time elapsed
    t2 = clock()
    #print("Time elapsed: {0}".format(t2 - t1))

    return SHARDSHistogram
예제 #3
0
def Worker(myWorkerId, fileOffset, lengthToProcess, globalProcessReferenceDict,
           histogramList, fileName):
    """
    Since this is fixed size SHARDS, start by sampling every reference. The sampling rate
    will be lowered accordingly as the SampleSet reaches maximum capacity.
    """

    mySampleSet = SampleSet(S_MAX)
    SHARDSHistogram = Histogram()
    myDistanceTree = LRUTree()

    #Open the trace file
    fp = open(os.path.join(PATH_TO_TRACE_DIR, "Traces", fileName),
              "r",
              encoding="utf-8")

    #Position the file pointer accordingly
    currentOffset = 0
    thisReference = fp.readline().strip()
    while thisReference != "" and currentOffset < fileOffset:
        currentOffset += 1
        thisReference = fp.readline().strip()

    #Process the desired partition of the trace
    currentOffset = 0
    thisReference = fp.readline().strip()
    while thisReference != "" and currentOffset < lengthToProcess:

        thisReference = thisReference[0:INDEX_OF_LAST_CHAR_IN_REF]

        #We only sample those disk references which satisfy our sampling condition
        if (hash(thisReference) % 100) < SAMPLE_RATE:
            """
            Check if the disk reference is in our sample set
            """
            #If no, this means we have a LOCAL infinity
            if mySampleSet.FindElement(thisReference) == False:

                #We must process this local infinity to determine if it is indeed global
                try:
                    lastProcessToAccess = globalProcessReferenceDict[
                        globalProcessReferenceDict]
                    if lastProcessToAccess < myWorkerId:
                        #crunch approximate depth
                        #approximateDepth = ((myWorkerId - lastProcessToAccess) * lengthToProcess) + currentOffset
                        approximateDepth = (
                            (myWorkerId - lastProcessToAccess - 1) *
                            lengthToProcess) + currentOffset + (
                                0.5 * lengthToProcess)

                        #This reuse distance needs to be scaled before it is inserted into the histogram
                        rescaleFactor = SAMPLE_RATE / 100

                        stackDistanceOfThisReference /= rescaleFactor
                        stackDistanceOfThisReference = round(
                            stackDistanceOfThisReference)

                        # Remove it from the stack and re-push it (since the stack distance of this element is now 1)
                        myDistanceTree.RemoveElement(thisReference)
                        myDistanceTree.InsertElement(thisReference)

                        # Update the histogram with the old stack depth of thisReference
                        if SHARDSHistogram.BucketInHistogram(
                                stackDistanceOfThisReference):
                            SHARDSHistogram.IncrementBucket(
                                stackDistanceOfThisReference)
                        else:
                            SHARDSHistogram.AddBucket(
                                stackDistanceOfThisReference, 1)

                    else:
                        #It is really a global infinity
                        #Insert the reference into the sample set
                        mySampleSet.InsertElement(thisReference,
                                                  hash(thisReference))
                        #Insert the element into the distance tree
                        myDistanceTree.InsertElement(thisReference)
                        #A miss occurred ("infinite" stack depth), record it in the histogram
                        SHARDSHistogram.IncrementBucket(-1)
                except:
                    lastProcessToAccess = -1
                    #It really is a global infinity!
                    #It is really a global infinity
                    #Insert the reference into the sample set
                    mySampleSet.InsertElement(thisReference,
                                              hash(thisReference))
                    #Insert the element into the distance tree
                    myDistanceTree.InsertElement(thisReference)
                    #A miss occurred ("infinite" stack depth), record it in the histogram
                    SHARDSHistogram.IncrementBucket(-1)

            #If yes
            else:
                #Since the address is already in the sample set, it is also in the tree. Get the stack depth
                stackDistanceOfThisReference = myDistanceTree.GetDistanceOfElement(
                    thisReference)

                #This reuse distance needs to be scaled before it is inserted into the histogram
                rescaleFactor = SAMPLE_RATE / 100

                stackDistanceOfThisReference /= SAMPLE_RATE / 100
                stackDistanceOfThisReference = round(
                    stackDistanceOfThisReference)

                # Remove it from the stack and re-push it (since the stack distance of this element is now 1)
                myDistanceTree.RemoveElement(thisReference)
                myDistanceTree.InsertElement(thisReference)

                # Update the histogram with the old stack depth of thisReference
                if SHARDSHistogram.BucketInHistogram(
                        stackDistanceOfThisReference):
                    SHARDSHistogram.IncrementBucket(
                        stackDistanceOfThisReference)
                else:
                    SHARDSHistogram.AddBucket(stackDistanceOfThisReference, 1)

            #We need to update the global dictionary
            globalProcessReferenceDict[thisReference] = myWorkerId

        thisReference = fp.readline().strip()
        currentOffset += 1

    print("Thread {0}. Processed {1} references".format(
        myWorkerId, currentOffset))
    #Update the manager with the partial histogram
    histogramList.append(SHARDSHistogram.GetBuckets())
    return