def mDestroyCopy(self,mysCopyID):
     try:
         nCopyIndex = self.lCopyIDs.index(mysCopyID)
     except ValueError:
         NTRC.tracef(0, "SHLF", "BUGCHECK copyID not found for removal|%s|" 
             % (mysCopyID))
         return False
     # Remove doc and copy from current lists.  
     del self.lCopyIDs[nCopyIndex]
     del self.lDocIDs[nCopyIndex]
     # Tell the server that the copy is gone.
     cCopy = G.dID2Copy[mysCopyID]
     sDocID = cCopy.sDocID
     self.cServer.mDestroyCopy(mysCopyID, sDocID, self.ID)
     # And give back the space it occupied.  
     self.bContig = False
     cDoc = G.dID2Document[sDocID]
     
     # BZZZT: DO NOT put this region back into use.  It has already 
     # suffered an error once and caused a document to fail.  
     #self.nFreeSpace += cDoc.nSize
     NTRC.tracef(3, "SHLF", "proc mDestroyCopy remove doc|%s| copy|%s| "
         "idx|%d| size|%d| from shelf|%s| remainingdocs|%d| free|%d|" 
         % (cCopy.sDocID, mysCopyID, nCopyIndex, cDoc.nSize, self.ID, 
         len(self.lCopyIDs), self.nFreeSpace))
     # And, at long last, destroy the Copy oject itself.
     del cCopy
     return self.ID + "-" + sDocID + "-" + mysCopyID
def fntMatchValue(mysLine, mydVar):
    '''\
    Extract value from line according to valueregex for var.
     If no value found, supply suitably disappointing string.  
    Get the right word from the line.
     If asked for word zero, use the whole line.  
     Makes the extraction harder, but sometimes necessary.
    '''
    sWordnumber = mydVar["wordnumber"]
    nWordnumber = int(sWordnumber)
    lWords = mysLine.split()
    if nWordnumber == 0:
        sWord = mysLine
    elif nWordnumber <= len(lWords):
        sWord = lWords[nWordnumber - 1]
    else:
        sWord = "nowordhere_indexoutofrange"
    sValueregex = mydVar["valueregex"]
    sVarname = mydVar["varname"]
    oMatch = re.search(sValueregex, sWord)
    NTRC.tracef(
        5, "MCHV",
        "proc MatchValue matching word var|%s| word|%s| valueregex|%s| matchobj|%s|"
        % (sVarname, sWord, sValueregex, oMatch))
    if oMatch:
        # Word matches the valueregex.  Save the value.
        sValue = oMatch.group(1)
        NTRC.tracef(3, "MCHV",
                    "proc addvalue name|%s| val|%s|" % (sVarname, sValue))
    else:
        # If not found, at least supply something conspicuous for printing.
        sValue = "novaluefound"
    return (sVarname, sValue)
    def mDestroyCopy(self, mysCopyID):
        try:
            nCopyIndex = self.lCopyIDs.index(mysCopyID)
        except ValueError:
            NTRC.tracef(
                0, "SHLF",
                "BUGCHECK copyID not found for removal|%s|" % (mysCopyID))
            return False
        # Remove doc and copy from current lists.
        del self.lCopyIDs[nCopyIndex]
        del self.lDocIDs[nCopyIndex]
        # Tell the server that the copy is gone.
        cCopy = G.dID2Copy[mysCopyID]
        sDocID = cCopy.sDocID
        self.cServer.mDestroyCopy(mysCopyID, sDocID, self.ID)
        # And give back the space it occupied.
        self.bContig = False
        cDoc = G.dID2Document[sDocID]

        # BZZZT: DO NOT put this region back into use.  It has already
        # suffered an error once and caused a document to fail.
        #self.nFreeSpace += cDoc.nSize
        NTRC.tracef(
            3, "SHLF", "proc mDestroyCopy remove doc|%s| copy|%s| "
            "idx|%d| size|%d| from shelf|%s| remainingdocs|%d| free|%d|" %
            (cCopy.sDocID, mysCopyID, nCopyIndex, cDoc.nSize, self.ID,
             len(self.lCopyIDs), self.nFreeSpace))
        # And, at long last, destroy the Copy oject itself.
        del cCopy
        return self.ID + "-" + sDocID + "-" + mysCopyID
def fntMatchValue(mysLine,mydVar):
    '''\
    Extract value from line according to valueregex for var.
     If no value found, supply suitably disappointing string.  
    Get the right word from the line.
     If asked for word zero, use the whole line.  
     Makes the extraction harder, but sometimes necessary.
    '''
    sWordnumber = mydVar["wordnumber"]
    nWordnumber = int(sWordnumber)
    lWords = mysLine.split()
    if nWordnumber == 0:
        sWord = mysLine
    elif nWordnumber <= len(lWords):
        sWord = lWords[nWordnumber-1]
    else: 
        sWord = "nowordhere_indexoutofrange"
    sValueregex = mydVar["valueregex"]
    sVarname = mydVar["varname"]
    oMatch = re.search(sValueregex,sWord)
    NTRC.tracef(5,"MCHV","proc MatchValue matching word var|%s| word|%s| valueregex|%s| matchobj|%s|" % (sVarname,sWord,sValueregex,oMatch))
    if oMatch:
        # Word matches the valueregex.  Save the value.
        sValue = oMatch.group(1)
        NTRC.tracef(3,"MCHV","proc addvalue name|%s| val|%s|" % (sVarname,sValue))
    else:
        # If not found, at least supply something conspicuous for printing.
        sValue = "novaluefound"
    return (sVarname,sValue)
    def fndFormatQuery(self, mydCli, myg):
        '''
        Take all the CLI options that might specify a searchable attribute, and 
         construct a MongoDB or searchspace query dictionary.  
         This is lots nastier than it first appears to be
         because json is so bloody picky.
        '''
        dOut = dict()
        for sAttrib,sValue in mydCli.items():
            result = None
            if sValue is not None:
                # Is it something valid in json?                
                try:
                    result = json.loads(sValue)
                except ValueError:
                    # Is it a string that should be an integer, ok in json?
                    try:
                        result = int(sValue)
                    except:
                        # Is it a naked string for some string-valued var
                        #  that isn't just Y/N or a mandatory string?  
                        #  Rule out dict values that are already formatted.
                        if (isinstance(sValue, str)
                            and sAttrib not in myg.lYesNoOptions
                            and sAttrib not in myg.lMandatoryArgs
                            and '{' not in sValue
                            and '}' not in sValue
                            and ':' not in sValue
                            and ',' not in sValue
                            ):
                            result = '{"$eq":' + '"'+sValue+'"' + '}'
                        else:
                            result = sValue
                    NTRC.tracef(3, "FMT", "proc FormatQuery notjson item "
                        "key|%s| val|%s| result|%s|" 
                        % (sAttrib, sValue, result))
            NTRC.tracef(3, "FMT", "proc FormatQuery item key|%s| val|%s| result|%s|" 
                % (sAttrib, sValue, result))
            # Can't process dicts thru json twice.
            if isinstance(result, dict):
                dOut[sAttrib] = sValue
            else:
                dOut[sAttrib] = result

        # Allow only attribs that appear in the database, else will get 
        #  no results due to implied AND of all items in query dict.  
        dOutSafe = {k:v for k,v in dOut.items() if k in myg.lSearchables}
        dOutNotNone = {k:v for k,v in dOutSafe.items() if v is not None}
        NTRC.ntracef(3,"FMT","proc dict b4|%s| \nsafe|%s|\nclean|%s|" 
            % (dOut,dOutSafe,dOutNotNone))
        if "sQuery" in dOutNotNone.keys():
            # If the brave user has supplied a full, standalone query string,
            #  add its contents to the query dict so far.
            dTmp = dOutNotNone["sQuery"]
            del dOutNotNone["sQuery"]
            dOutNotNone.update(dTmp)
        return dOutNotNone
    def fndFormatQuery(self, mydCli, myg):
        '''
        Take all the CLI options that might specify a searchable attribute, and 
         construct a MongoDB or searchspace query dictionary.  
         This is lots nastier than it first appears to be
         because json is so bloody picky.
        '''
        dOut = dict()
        for sAttrib, sValue in mydCli.items():
            result = None
            if sValue is not None:
                # Is it something valid in json?
                try:
                    result = json.loads(sValue)
                except ValueError:
                    # Is it a string that should be an integer, ok in json?
                    try:
                        result = int(sValue)
                    except:
                        # Is it a naked string for some string-valued var
                        #  that isn't just Y/N or a mandatory string?
                        #  Rule out dict values that are already formatted.
                        if (isinstance(sValue, str)
                                and sAttrib not in myg.lYesNoOptions
                                and sAttrib not in myg.lMandatoryArgs
                                and '{' not in sValue and '}' not in sValue
                                and ':' not in sValue and ',' not in sValue):
                            result = '{"$eq":' + '"' + sValue + '"' + '}'
                        else:
                            result = sValue
                    NTRC.tracef(
                        3, "FMT", "proc FormatQuery notjson item "
                        "key|%s| val|%s| result|%s|" %
                        (sAttrib, sValue, result))
            NTRC.tracef(
                3, "FMT", "proc FormatQuery item key|%s| val|%s| result|%s|" %
                (sAttrib, sValue, result))
            # Can't process dicts thru json twice.
            if isinstance(result, dict):
                dOut[sAttrib] = sValue
            else:
                dOut[sAttrib] = result

        # Allow only attribs that appear in the database, else will get
        #  no results due to implied AND of all items in query dict.
        dOutSafe = {k: v for k, v in dOut.items() if k in myg.lSearchables}
        dOutNotNone = {k: v for k, v in dOutSafe.items() if v is not None}
        NTRC.ntracef(
            3, "FMT", "proc dict b4|%s| \nsafe|%s|\nclean|%s|" %
            (dOut, dOutSafe, dOutNotNone))
        if "sQuery" in dOutNotNone.keys():
            # If the brave user has supplied a full, standalone query string,
            #  add its contents to the query dict so far.
            dTmp = dOutNotNone["sQuery"]
            del dOutNotNone["sQuery"]
            dOutNotNone.update(dTmp)
        return dOutNotNone
    def mScheduleGlitch(self):
        '''Wait for a glitch lifetime on this shelf.
        If the shelf died as a result of the glitch, stop
        rescheduling.  
        '''
        fNow = G.env.now
        NTRC.tracef(
            3, "LIFE", "proc schedule glitch t|%d| shelf|%s| alive|%s|" %
            (fNow, self.sShelfID, self.cShelf.mbIsShelfAlive()))
        while 1:
            fNow = G.env.now
            bAlive = self.cShelf.mbIsShelfAlive()
            if bAlive:
                self.fShelfLife = self.mfCalcCurrentGlitchLifetime(fNow)
                if self.fShelfLife > 0 and bAlive:
                    self.fShelfInterval = util.makeexpo(self.fShelfLife)
                    lg.logInfo(
                        "LIFETIME", "schedule  t|%6.0f| for shelf|%s| "
                        "interval|%.3f| freq|%d| life|%.3f|" %
                        (fNow, self.sShelfID, self.fShelfInterval,
                         self.nGlitchFreq, self.fShelfLife))
                    NTRC.tracef(
                        3, "LIFE", "proc schedule glitch shelf|%s| "
                        "interval|%.3f| based on life|%.3f| alive|%s| "
                        "waiting..." % (self.sShelfID, self.fShelfInterval,
                                        self.fShelfLife, bAlive))
                    yield G.env.timeout(self.fShelfInterval)

                    # ****** Glitch has now occurred. ******
                    # If correlated failure, step entirely outside the
                    #  Lifetime-Shelf-Server context to signal several servers.
                    if self.nGlitchSpan > 1:
                        from server import CServer
                        CServer.fnCorrFailHappensToAll(self.nGlitchSpan)
                    else:
                        self.mGlitchHappensNow()
                else:
                    NTRC.ntracef(
                        3, "LIFE", "proc glitch no freq or not alive, "
                        "set wait to infinity shelf|%s| freq|%d| life|%.3f| "
                        "interval|%.3f|" %
                        (self.sShelfID, self.nGlitchFreq, self.fShelfLife,
                         self.fShelfInterval))
                    yield G.env.timeout(G.fInfinity)
            else:
                break  # Because we have to use fako "while 1".
        # When shelf is not alive anymore, wait forever
        NTRC.ntracef(
            3, "LIFE", "proc glitch shelf no longer alive, set wait "
            "to infinity shelf|%s| freq|%d| life|%.3f| interval|%.3f|" %
            (self.sShelfID, self.nGlitchFreq, self.fShelfLife,
             self.fShelfInterval))
        yield G.env.timeout(G.fInfinity)
def fntDoesLineMatchThisVar(mysLine, mynLineNr, mysVarname):
    '''\
    Check line against lineregex of var.
    Return tuple (matchobject, line, varname).
    '''
    dVar = g.dVars[mysVarname]
    sLineregex = dVar["lineregex"]
    oMatch = re.search(sLineregex,mysLine)
    NTRC.tracef(5,"MTLN","proc MatchLine try regex|%s| var|%s| nr|%s| line|%s| match|%s|" % (sLineregex,mysVarname,mynLineNr,mysLine,oMatch))
    if oMatch:
        NTRC.tracef(3,"LINE","proc MatchLine found line|%s|=|%s| var|%s| regex|%s|" % (mynLineNr,mysLine,mysVarname,sLineregex))
    return (oMatch, mysLine, mysVarname)
def fnldParseInput(mysFilename):
    ''' Return tuple containing
        - the output template string, 
        - a list, one item per line, of dicts of column args from the 
          csv that contain instructions for getting variable values
          from lines.  
        Beware duck-type integers that become strings.

        Format of csv lines:        
        varname,regex to find line,split word number,regex to strip out value

        instruction file format:

        ##becomes comment in output
        ###send out this string as header for the output, no hashes
        =outputformat
        format string
        =variables
        varname,lineregex,wordnumber,valueregex (header)
        (lines of csv data)

    '''
    dParams = dict()
    with open(mysFilename,"rb") as fhInfile:
        # Remove comments.  
        lLines = filter( lambda sLine:                          \
                        not re.match("^ *#[^#]",sLine)          \
                        and not re.match("^ *$",sLine.rstrip()) \
                        , fhInfile )

        # Get the output template.  It may be longer than one line.  
        lTemplate = fnlLinesInRange(lLines,"^=template","^=variables")
        lTemplate = map( lambda sLine: sLine.rstrip().replace("###","").replace("##","#"), lTemplate )
        NTRC.tracef(3,"INPT","proc ParseInput template|%s|" % (lTemplate))

        # Fix the separator in the template according to the user spec.
        lAllTemplateNames = [lTemplateLine.split() for lTemplateLine in lTemplate]
        lNewTemplate = [g.sSeparator.join(lTemplateNamesOneLine) \
            for lTemplateNamesOneLine in lAllTemplateNames]

        # Now get the CSV args into a dictionary of dictionaries.
        lVarLines = fnlLinesInRange(lLines,"^=variables","^=thiswillnotbefound")
        lRowDicts = csv.DictReader(lVarLines)
        NTRC.tracef(5,"INPT","proc ParseInput lRowDicts all|%s|" % (lRowDicts))
        
        dParams = dict( map( lambda dRowDict:   \
            (dRowDict["varname"],dRowDict)      \
            , lRowDicts ))

    return (lNewTemplate,dParams)
 def mDestroyCopy(self, mysCopyID, mysDocID, mysShelfID):
     ''' Oops, a doc died, maybe just one or maybe the whole shelf.
     '''
     NTRC.tracef(
         3, "SERV", "proc mDestroyCopy remove copy|%s| doc|%s| "
         "from shelf|%s|" % (mysCopyID, mysDocID, mysShelfID))
     # Inform the client that the copy is gonzo.
     cClient = G.dID2Client[self.dDocIDs[mysDocID]]
     cClient.mDestroyCopy(mysDocID, self.ID, mysCopyID)
     # Clear out local traces of the doc and copy.
     self.lDocIDs.remove(mysDocID)
     del self.dDocIDs[mysDocID]
     # The Shelf will nuke the copy, because it created it.
     return self.ID + "-" + mysDocID
 def mInjectError(self, mynReduction, mynDecayHalflife, mynGlitchMaxlife):
     '''\
     When a glitch occurs, decrease lifetime by some amount, percentage.
     The decrease decays exponentially at some rate until negligible.  
     '''
     self.nReductionPercentage = mynReduction
     self.fDecayHalflife = float(mynDecayHalflife)
     self.fDecayRate = self.fLn2 / self.fDecayHalflife
     self.fMaxlife = float(mynGlitchMaxlife)
     NTRC.tracef(
         3, "LIFE", "proc inject reduct|%s| decayhalflife|%s| "
         "decayrate|%s| maxlife|%s|" %
         (self.nReductionPercentage, self.fDecayHalflife, self.fDecayRate,
          self.fMaxlife))
     return self.fDecayRate
def fntDoesLineMatchThisVar(mysLine, mynLineNr, mysVarname):
    '''\
    Check line against lineregex of var.
    Return tuple (matchobject, line, varname).
    '''
    dVar = g.dVars[mysVarname]
    sLineregex = dVar["lineregex"]
    oMatch = re.search(sLineregex, mysLine)
    NTRC.tracef(
        5, "MTLN",
        "proc MatchLine try regex|%s| var|%s| nr|%s| line|%s| match|%s|" %
        (sLineregex, mysVarname, mynLineNr, mysLine, oMatch))
    if oMatch:
        NTRC.tracef(
            3, "LINE", "proc MatchLine found line|%s|=|%s| var|%s| regex|%s|" %
            (mynLineNr, mysLine, mysVarname, sLineregex))
    return (oMatch, mysLine, mysVarname)
    def mAddDocument(self, mysDocID, mysClientID):
        ''' Find a shelf with room for the doc, or create one.
            Put the doc on the shelf, decrement the remaining space.
        '''
        # If the server is already dead, do not accept any documents.
        if not self.bDead:
            cDoc = G.dID2Document[mysDocID]
            nSize = cDoc.nSize
            # Find a shelf with sufficient empty space and place the doc there.
            cShelf = None
            for sShelfID in self.lShelfIDs:
                cShelf = G.dID2Shelf[sShelfID]
                bResult = cShelf.mAcceptDocument(mysDocID, nSize, mysClientID)
                if bResult:
                    break  # True = doc has been stored
                else:
                    continue  # False = no, try another shelf, if any
            else:  # If no more shelves, create another and use it.
                sNewShelfID = self.mCreateShelf()
                self.lShelfIDs.append(sNewShelfID)
                cShelf = G.dID2Shelf[sNewShelfID]
                sShelfID = cShelf.ID  # TODO: #Why not just use sNewShelfID?
                result = cShelf.mAcceptDocument(mysDocID, nSize, mysClientID)

            # Record that the doc has been stored on this server.
            self.lDocIDsComplete.append(mysDocID)
            self.bInUse = True
            self.lDocIDs.append(mysDocID)
            self.dDocIDs[mysDocID] = mysClientID
            NTRC.tracef(
                3, "SERV", "proc mAddDocument serv|%s| id|%s| "
                "docid|%s| size|%s| assigned to shelfid|%s| remaining|%s|" %
                (self.sName, self.ID, mysDocID, cDoc.nSize, sShelfID,
                 cShelf.nFreeSpace))

            return self.ID + "+" + sShelfID + "+" + mysDocID
        else:
            NTRC.ntracef(
                3, "SERV", "proc mAddDocument1 dead server|%s| do not "
                "add doc|%s| for client|%s|" %
                (self.ID, mysDocID, mysClientID))
            return False
Ejemplo n.º 14
0
    def mAddDocument(self, mysDocID, mysClientID):
        ''' Add a document to this shelf and record some information
            in the document itself.
        '''
        self.lDocIDs.append(mysDocID)
        self.lDocIDsComplete.append(mysDocID)
        self.lClientIDs.append(mysClientID)
        cDoc = G.dID2Document[mysDocID]
        nSize = cDoc.nSize

        # Make a copy of the document and shelve that.
        cCopy = CCopy(mysDocID, mysClientID, self.sServerID)
        sCopyID = cCopy.ID
        NTRC.tracef(
            3, "SHLF", "proc mAddDocument made copy|%s| of doc|%s| "
            "from client|%s|" % (sCopyID, mysDocID, mysClientID))

        # Where does document go on this shelf.  Closed interval [Begin,End].
        #        nBlkBegin = self.nCapacity - self.nFreeSpace
        # BZZZT: Never reuse space.  Any empty space in the area that
        # *used* to be occupied by documents has already been damaged
        # and destroyed a document.  Do not reuse the space.
        # Yeah, I know it's all just hypothetical, but why not.
        nBlkBegin = self.nHiWater + 1
        self.nFreeSpace -= nSize
        nBlkEnd = nBlkBegin + nSize - 1
        if nBlkEnd > self.nHiWater:
            self.nHiWater = nBlkEnd  # Last block used.
#        sShelfID = self.ID
#        sServerID = self.sServerID
        cCopy.mShelveCopy(self.sServerID, self.ID, nBlkBegin, nBlkEnd)
        self.lCopyIDs.append(sCopyID)
        self.lCopyIDsComplete.append(sCopyID)
        self.lCopyTops.append(nBlkEnd)

        cDoc.mCopyPlacedOnServer(sCopyID, self.sServerID)
        NTRC.tracef(
            5, "SHLF", "proc mAddDocument add doc|%s| to shelf|%s| "
            "size|%d| remaining|%d|" %
            (mysDocID, self.ID, nSize, self.nFreeSpace))

        return self.sServerID + "+" + self.ID + "+" + mysDocID + "+" + sCopyID
    def mAuditCollection(self, mynCycleInterval, mynSegments, mysCollectionID, 
            myeCallerSyncEvent):
        '''\
        SimPy generator to audit an entire collection.
        Divide the collection into segments and schedule audits
        for each segment in turn.
        '''
        fTimeCycleBegin = G.env.now
        lg.logInfo("AUDIT2","begin colln t|%10.3f| auditid|%s| cycle|%s| cli|%s| coll|%s|" % (G.env.now,self.ID,self.nNumberOfCycles,self.sClientID,self.sCollectionID))

        for iThisSegment in range(mynSegments):
            tSegmentStartTime = G.env.now
            nSegmentInterval = self.mCalcSegmentInterval(mynCycleInterval, 
                mynSegments)
            bLastSegment = (iThisSegment == mynSegments-1)

            self.lDocsThisSegment = self.mIdentifySegment(mysCollectionID, 
                mynSegments, iThisSegment)
            eSyncEvent = G.env.event()
            G.env.process(
                self.mAuditSegment(iThisSegment, self.lDocsThisSegment, 
                mysCollectionID, eSyncEvent))
            # Wait for completion of segment and its allotted time.
            yield eSyncEvent
            tNextSegmentStartTime = tSegmentStartTime + nSegmentInterval
            NTRC.tracef(3, "AUD2", "proc AuditCollection1 now|%s| tstart|%s| "
                "tnext|%s| tinterval|%s| blastseg|%s|" 
                % (G.env.now, tSegmentStartTime, tNextSegmentStartTime, 
                nSegmentInterval, bLastSegment))
            yield G.env.timeout(tNextSegmentStartTime - G.env.now)
        
        fTimeCycleEnd = G.env.now
        self.fTimeCycleLength = fTimeCycleEnd - fTimeCycleBegin
        lg.logInfo("AUDIT2", "end colln   t|%10.3f| auditid|%s| cycle|%s| "
            "cli|%s| coll|%s| repairs|%d| total|%d| perms|%d| "
            "majority|%s| minority|%d| duration|%9.3f|" 
            % (G.env.now, self.ID, self.nNumberOfCycles, self.sClientID, 
            self.sCollectionID, self.nRepairsThisCycle, self.nRepairsTotal, 
            self.nPermanentLosses, self.nRepairsMajority, 
            self.nRepairsMinority, self.fTimeCycleLength))
        # Tell the caller that we finished.
        myeCallerSyncEvent.succeed(value=self.nNumberOfCycles)
Ejemplo n.º 16
0
    def mAuditCollection(self, mynCycleInterval, mynSegments, mysCollectionID, 
            myeCallerSyncEvent):
        '''\
        SimPy generator to audit an entire collection.
        Divide the collection into segments and schedule audits
        for each segment in turn.
        '''
        fTimeCycleBegin = G.env.now
        lg.logInfo("AUDIT2","begin colln t|%10.3f| auditid|%s| cycle|%s| cli|%s| coll|%s|" % (G.env.now,self.ID,self.nNumberOfCycles,self.sClientID,self.sCollectionID))

        for iThisSegment in range(mynSegments):
            tSegmentStartTime = G.env.now
            nSegmentInterval = self.mCalcSegmentInterval(mynCycleInterval, 
                mynSegments)
            bLastSegment = (iThisSegment == mynSegments-1)

            self.lDocsThisSegment = self.mIdentifySegment(mysCollectionID, 
                mynSegments, iThisSegment)
            eSyncEvent = G.env.event()
            G.env.process(
                self.mAuditSegment(iThisSegment, self.lDocsThisSegment, 
                mysCollectionID, eSyncEvent))
            # Wait for completion of segment and its allotted time.
            yield eSyncEvent
            tNextSegmentStartTime = tSegmentStartTime + nSegmentInterval
            NTRC.tracef(3, "AUD2", "proc AuditCollection1 now|%s| tstart|%s| "
                "tnext|%s| tinterval|%s| blastseg|%s|" 
                % (G.env.now, tSegmentStartTime, tNextSegmentStartTime, 
                nSegmentInterval, bLastSegment))
            yield G.env.timeout(tNextSegmentStartTime - G.env.now)
        
        fTimeCycleEnd = G.env.now
        self.fTimeCycleLength = fTimeCycleEnd - fTimeCycleBegin
        lg.logInfo("AUDIT2", "end colln   t|%10.3f| auditid|%s| cycle|%s| "
            "cli|%s| coll|%s| repairs|%d| total|%d| perms|%d| "
            "majority|%s| minority|%d| duration|%9.3f|" 
            % (G.env.now, self.ID, self.nNumberOfCycles, self.sClientID, 
            self.sCollectionID, self.nRepairsThisCycle, self.nRepairsTotal, 
            self.nPermanentLosses, self.nRepairsMajority, 
            self.nRepairsMinority, self.fTimeCycleLength))
        # Tell the caller that we finished.
        myeCallerSyncEvent.succeed(value=self.nNumberOfCycles)
 def mGlitchHappens(self, myfNow):
     self.bGlitchActive = True
     self.nGlitches += 1
     G.nGlitchesTotal += 1
     lg.logInfo(
         "LIFETIME", "glitch    t|%6.0f|  on shelf|%s| num|%s| "
         "impactpct|%d| decayhalflife|%d| span|%d| maxlife|%d| gtotal|%s|" %
         (myfNow, self.sShelfID, self.nGlitches, self.nImpactReductionPct,
          self.nGlitchDecayHalflife, self.nGlitchSpan, self.nGlitchMaxlife,
          G.nGlitchesTotal))
     self.fGlitchBegin = float(G.env.now)
     NTRC.tracef(
         3, "LIFE", "proc happens1 t|%.3f| shelf|%s| num|%s| impact|%d| "
         "decayhalflife|%d| span|%d| maxlife|%d|" %
         (myfNow, self.sShelfID, self.nGlitches, self.nImpactReductionPct,
          self.nGlitchDecayHalflife, self.nGlitchSpan, self.nGlitchMaxlife))
     ''' If this is a 100% glitch:
         - Declare server, not just shelf, to be dead.
         - Auditor will eventually discover the problem and 
            call client to inform that server is dead.  
     '''
     sServerID = self.cShelf.sServerID
     if G.dID2Server[sServerID].bDead or self.nImpactReductionPct == 100:
         self.cShelf.bAlive = False
         #sServerID = self.cShelf.sServerID
         cServer = G.dID2Server[sServerID]
         NTRC.ntracef(
             3, "LIFE", "proc happens2 glitch 100pct or server dead "
             "id|%s| shelf|%s| svr|%s|" %
             (self.ID, self.cShelf.ID, sServerID))
         cServer.mServerDies()
         NTRC.ntracef(
             3, "LIFE", "proc happens3 life|%s| killed server |%s|" %
             (self.ID, sServerID))
         lg.logInfo(
             "LIFETIME", "100pct glitch on shelf |%s| "
             "of server|%s| - all docs lost" % (self.sShelfID, sServerID))
     else:
         self.mInjectError(self.nImpactReductionPct,
                           self.nGlitchDecayHalflife, self.nGlitchMaxlife)
     return (self.nGlitches, self.sShelfID)
Ejemplo n.º 18
0
    def mAddDocument(self, mysDocID, mysClientID):
        ''' Add a document to this shelf and record some information
            in the document itself.
        '''
        self.lDocIDs.append(mysDocID)
        self.lDocIDsComplete.append(mysDocID)
        self.lClientIDs.append(mysClientID)
        cDoc = G.dID2Document[mysDocID]
        nSize = cDoc.nSize

        # Make a copy of the document and shelve that.  
        cCopy = CCopy(mysDocID, mysClientID, self.sServerID)
        sCopyID = cCopy.ID
        NTRC.tracef(3,"SHLF","proc mAddDocument made copy|%s| of doc|%s| "
            "from client|%s|" 
            % (sCopyID, mysDocID, mysClientID))

        # Where does document go on this shelf.  Closed interval [Begin,End].
#        nBlkBegin = self.nCapacity - self.nFreeSpace
        # BZZZT: Never reuse space.  Any empty space in the area that 
        # *used* to be occupied by documents has already been damaged
        # and destroyed a document.  Do not reuse the space.  
        # Yeah, I know it's all just hypothetical, but why not.  
        nBlkBegin = self.nHiWater + 1
        self.nFreeSpace -= nSize
        nBlkEnd = nBlkBegin + nSize - 1
        if nBlkEnd > self.nHiWater:
            self.nHiWater = nBlkEnd         # Last block used.  
#        sShelfID = self.ID
#        sServerID = self.sServerID
        cCopy.mShelveCopy(self.sServerID, self.ID, nBlkBegin, nBlkEnd)
        self.lCopyIDs.append(sCopyID)
        self.lCopyIDsComplete.append(sCopyID)
        self.lCopyTops.append(nBlkEnd)

        cDoc.mCopyPlacedOnServer(sCopyID, self.sServerID)
        NTRC.tracef(5,"SHLF","proc mAddDocument add doc|%s| to shelf|%s| "
            "size|%d| remaining|%d|" 
            % (mysDocID,self.ID,nSize,self.nFreeSpace))
        
        return self.sServerID+"+"+self.ID+"+"+mysDocID+"+"+sCopyID
Ejemplo n.º 19
0
def main(mysInputFilename):
    pass
    # Create output template.
    lTemplate = map(lambda field: ("{" + field + "}"), g.lCoreColumns)
    sTemplate = " ".join(lTemplate)

    # Process file.
    with open(mysInputFilename, "r") as fhIn:
        nErrors = 0
        lErrors = []
        oReader = csv.reader(fhIn, delimiter=g.sSeparator)

        # First line better be the header.
        lHeader = next(oReader)
        NTRC.tracef(3, "NARO", "proc lHeader|%s|" % (lHeader))

        # For each data line, create dict of values and map them into
        #  the reduced-width output template.
        print(g.sCoreColumns)
        nLine = 1  # Count the header line as 1.
        for lValues in oReader:
            NTRC.tracef(3, "NARO", "proc lValues|%s|" % (lValues))
            dValues = dict(zip(lHeader, lValues))
            NTRC.tracef(3, "NARO", "proc dValues|%s|" % (dValues))
            sOut = sTemplate.format(**dValues)
            nLine += 1
            print(sOut)
            if "nolinefound" in sOut:
                nErrors += 1
                lErrors.append(nLine)
    if nErrors > 0:
        print("#ERROR - MISSING DATA nolinefound at %s" % (lErrors))
def main(mysInputFilename):
    pass
    # Create output template.
    lTemplate = map(lambda field: ("{" + field + "}"), g.lCoreColumns)
    sTemplate = " ".join(lTemplate)

    # Process file.
    with open(mysInputFilename, "r") as fhIn:
        nErrors = 0
        lErrors = []
        oReader = csv.reader(fhIn, delimiter=g.sSeparator)
    
        # First line better be the header.
        lHeader = next(oReader)
        NTRC.tracef(3, "NARO", "proc lHeader|%s|" % (lHeader))
    
        # For each data line, create dict of values and map them into 
        #  the reduced-width output template.  
        print(g.sCoreColumns)
        nLine = 1               # Count the header line as 1.
        for lValues in oReader:
            NTRC.tracef(3, "NARO", "proc lValues|%s|" % (lValues))
            dValues = dict(zip(lHeader, lValues))
            NTRC.tracef(3, "NARO", "proc dValues|%s|" % (dValues))
            sOut = sTemplate.format(**dValues)
            nLine += 1
            print(sOut)
            if "nolinefound" in sOut:
                nErrors += 1
                lErrors.append(nLine)
    if nErrors > 0:
        print("#ERROR - MISSING DATA nolinefound at %s" % (lErrors))
Ejemplo n.º 21
0
    def mAge_shelf(self, mynLifeParam):
        ''' An entire shelf fails.  Remove all the docs it contained.
            Eventually, this will trigger a repair event and make the 
            collection more vulnerable during the repair.  
        '''
        fShelfLife = util.makeexpo(mynLifeParam)
        lg.logInfo(
            "SERVER", "mAge_shelf set lifetime time|%6.0f| shelf|%s| "
            "next lifetime|%.3f|khr" % (G.env.now, self.ID, fShelfLife))
        NTRC.tracef(
            3, "SHLF", "proc mAge_shelf  time|%6.0f| shelf|%s| "
            "next lifetime|%.3f|khr" % (G.env.now, self.ID, fShelfLife))
        yield G.env.timeout(fShelfLife)

        # S H E L F  F A I L S
        G.nTimeLastEvent = G.env.now
        self.bAlive = False  # Shelf can no longer be used to store docs.
        NTRC.tracef(
            3, "SHLF", "proc mAge_shelf  time|%d| shelf|%s| shelf_error" %
            (G.env.now, self.ID))
        lg.logInfo(
            "SERVER", "storage shelf failed time|%6.0f| server|%s| "
            "shelf|%s| lost |%d| docs" %
            (G.env.now, self.sServerID, self.ID, len(self.lCopyIDs)))
        # This whole shelf is a goner.  Kill it.
        NTRC.tracef(
            5, "SHLF", "proc mAge_shelf kill contents ldocs|%s| "
            "lcopies|%s|" % (self.lDocIDs, self.lCopyIDs))
        # Note that we have to copy the list before modifying it and
        # iterate over the copy of the list.
        # Standard problem with updating an iterable inside the for loop.
        templCopyIDs = copy.deepcopy(self.lCopyIDs)
        for sCopyID in templCopyIDs:
            sDocID = G.dID2Copy[sCopyID].sDocID
            self.mDestroyCopy(sCopyID)
            #            G.dID2Server[self.sServerID].mDestroyDocument(sDocID,self.ID)
            G.dID2Server[self.sServerID].mDestroyCopy(sCopyID, sDocId, self.ID)
            self.mReportDocumentLost(sDocID)
        NTRC.tracef(
            3, "FAIL", "proc t|%d| shelf failure server|%s| qual|%d| "
            "shelf|%s| docs|%d|" %
            (G.env.now, self.sServerID, G.dID2Server[self.sServerID].nQual,
             self.ID, len(templCopyIDs)))
Ejemplo n.º 22
0
    def mAge_shelf(self, mynLifeParam):
        ''' An entire shelf fails.  Remove all the docs it contained.
            Eventually, this will trigger a repair event and make the 
            collection more vulnerable during the repair.  
        '''
        fShelfLife = util.makeexpo(mynLifeParam)
        lg.logInfo("SERVER", "mAge_shelf set lifetime time|%6.0f| shelf|%s| "
            "next lifetime|%.3f|khr" 
            % (G.env.now,self.ID,fShelfLife))
        NTRC.tracef(3, "SHLF", "proc mAge_shelf  time|%6.0f| shelf|%s| "
            "next lifetime|%.3f|khr" 
            % (G.env.now,self.ID,fShelfLife))
        yield G.env.timeout(fShelfLife)

        # S H E L F  F A I L S 
        G.nTimeLastEvent = G.env.now
        self.bAlive = False         # Shelf can no longer be used to store docs.
        NTRC.tracef(3, "SHLF", "proc mAge_shelf  time|%d| shelf|%s| shelf_error" 
            % (G.env.now,self.ID))
        lg.logInfo("SERVER", "storage shelf failed time|%6.0f| server|%s| "
            "shelf|%s| lost |%d| docs" 
            % (G.env.now,self.sServerID,self.ID,len(self.lCopyIDs)))
        # This whole shelf is a goner.  Kill it. 
        NTRC.tracef(5, "SHLF", "proc mAge_shelf kill contents ldocs|%s| "
            "lcopies|%s|" 
            % (self.lDocIDs,self.lCopyIDs)) 
        # Note that we have to copy the list before modifying it and 
        # iterate over the copy of the list.  
        # Standard problem with updating an iterable inside the for loop.
        templCopyIDs = copy.deepcopy(self.lCopyIDs)
        for sCopyID in templCopyIDs:
            sDocID = G.dID2Copy[sCopyID].sDocID
            self.mDestroyCopy(sCopyID)
#            G.dID2Server[self.sServerID].mDestroyDocument(sDocID,self.ID)
            G.dID2Server[self.sServerID].mDestroyCopy(sCopyID,sDocId,self.ID)
            self.mReportDocumentLost(sDocID)
        NTRC.tracef(3, "FAIL", "proc t|%d| shelf failure server|%s| qual|%d| "
            "shelf|%s| docs|%d|" 
            % (G.env.now, self.sServerID, G.dID2Server[self.sServerID].nQual, 
            self.ID,len(templCopyIDs)))
Ejemplo n.º 23
0
def logSetConfig(mysLogLevel, mysLogFile):
    lLogLevels = 'NOTSET CRITICAL ERROR WARNING INFO DEBUG'.split()
    sLogLevel = mysLogLevel.upper()
    if sLogLevel not in lLogLevels:
        NTRC.tracef(0, "LGOU",
                    "ERROR unrecognized logging level|%s|" % (mysLogLevel))
        sLogLevel = "NOTSET"

    # Set the logging level for this session.
    NTRC.tracef(3, "LGOU", "proc sLogLevel|%s|" % (sLogLevel))
    logger.setLevel(sLogLevel.upper())
    ''' Set the output file for logging.
        Either to a filename in LOG_FILE param or environ variable, 
        or to the console using StreamHandler.
    '''
    if  mysLogFile != ""    \
    and mysLogFile != " "   \
    and mysLogFile != "-"   \
    and mysLogFile.upper() != "NONE"    \
    and mysLogFile.upper() != "CONSOLE" \
    and mysLogFile.upper() != "STDOUT"  :
        channel = logging.FileHandler(mysLogFile)
    else:
        channel = logging.StreamHandler()
    NTRC.tracef(3, "LGOU",
                "proc set log handler mysLogFile|%s|" % (mysLogFile))
    ''' Adjust the format of log output to match the time stamps
        we have used in TRACE forever.  
    '''
    # Create formatter instance.
    formatter = logging.Formatter(
        fmt='%(asctime)s %(name)s %(levelname)s - %(message)s',
        datefmt='%Y%m%d_%H%M%S')
    # Add formatter to the output channel.
    channel.setFormatter(formatter)
    # Finally, add the channel handler to the logger.
    logger.addHandler(channel)

    return logger
def logSetConfig(mysLogLevel,mysLogFile):
    lLogLevels = 'NOTSET CRITICAL ERROR WARNING INFO DEBUG'.split()
    sLogLevel = mysLogLevel.upper()
    if sLogLevel not in lLogLevels:
        NTRC.tracef(0,"LGOU","ERROR unrecognized logging level|%s|" % (mysLogLevel))
        sLogLevel = "NOTSET"

    # Set the logging level for this session.
    NTRC.tracef(3,"LGOU","proc sLogLevel|%s|"%(sLogLevel))
    logger.setLevel(sLogLevel.upper())

    ''' Set the output file for logging.
        Either to a filename in LOG_FILE param or environ variable, 
        or to the console using StreamHandler.
    '''
    if  mysLogFile != ""    \
    and mysLogFile != " "   \
    and mysLogFile != "-"   \
    and mysLogFile.upper() != "NONE"    \
    and mysLogFile.upper() != "CONSOLE" \
    and mysLogFile.upper() != "STDOUT"  :
        channel = logging.FileHandler(mysLogFile)
    else:
        channel = logging.StreamHandler()
    NTRC.tracef(3,"LGOU","proc set log handler mysLogFile|%s|" % (mysLogFile))

    ''' Adjust the format of log output to match the time stamps
        we have used in TRACE forever.  
    '''
    # Create formatter instance.
    formatter = logging.Formatter(fmt='%(asctime)s %(name)s %(levelname)s - %(message)s', datefmt='%Y%m%d_%H%M%S')
    # Add formatter to the output channel.
    channel.setFormatter(formatter)
    # Finally, add the channel handler to the logger.
    logger.addHandler(channel)
    
    return logger
def main():
    '''
    Process:
    - Parse the CLI command into g.various data items.
    - Validate user-supplied directories; get environment variables.
    - Query the searchspace for the stream of instructions
    - For each instruction from database selection, get dict for line
    - Using dict args, construct plausible command lines, into file
    - Check to see that there aren't too many similar processes 
      already running; if too many, then wait.
    - Launch ListActor process to execute commands.
    - Wait a polite interval before launching another.
    '''
    NTRC.ntracef(0, "MAIN", "Begin.")
    NTRC.ntracef(0, "MAIN", "TRACE  traceproduction|%s|" % NTRC.isProduction())

    def fnbQEnd():
        return g.bLast

    sBrokerCommand = fnsReconstituteCommand(sys.argv)
    fnbMaybeLogCommand(sBrokerCommand)
    NTRC.ntracef(0, "MAIN", "command=|%s|" % (sBrokerCommand.rstrip()))

    # Get args from CLI and put them into the global data
    dCliDict = brokercli.fndCliParse("")
    # Carefully insert any new CLI values into the Global object.
    dCliDictClean = {
        k: util.fnIntPlease(v)
        for k, v in dCliDict.items() if v is not None
    }
    g.__dict__.update(dCliDictClean)

    # Validate that the user-specified directories exist.
    if not fnbValidateDir(g.sFamilyDir):
        raise ValueError("FamilyDir \"%s\" not found" % (g.sFamilyDir))
    if not fnbValidateDir("%s/%s" % (g.sFamilyDir, g.sSpecificDir)):
        raise ValueError("SpecificDir \"%s\" not found" % (g.sSpecificDir))

    # Get command templates from external file.
    fnGetCommandTemplates(g.sCommandListFilename)

    # Construct database query for this invocation.
    g.cFmt = brokerformat.CFormat()
    dQuery = g.cFmt.fndFormatQuery(dCliDict, g)

    # Look for overriding environment variables
    fnvGetEnvironmentOverrides()

    # Open the database to keep "done" records,
    #  and delete moldy, old in-progress records.
    g.mdb = searchdatabasemongo.CSearchDatabase(
        g.sSearchDbMongoName, g.sSearchDbProgressCollectionName,
        g.sSearchDbDoneCollectionName)
    g.mdb.fnvDeleteProgressCollection()

    # Get the set of instructions for today from database.
    NTRC.tracef(
        0, "MAIN",
        "proc querydict2|%s|" % (list(util.fngSortDictItemsByKeys(dQuery))))
    itAllInstructions = searchspace.fndgGetSearchSpace(g.sInsDir, g.sInsTyp,
                                                       dQuery)

    # Start the start-end threads.
    nb.fntRunEverything(g, g.qInstructions, fnbQEnd, g.nCoreTimer,
                        g.nStuckLimit)

    # If this wasn't just a listonly run, do all the cases.
    if not g.sListOnly.startswith("Y"):
        NTRC.ntracef(3, "MAIN", "proc all instr|%s|" % (g.lGiantInstr))
    else:
        NTRC.ntracef(0, "MAIN", "Listonly.")
    nRuns = fnnProcessAllInstructions(itAllInstructions)
    NTRC.ntracef(0, "MAIN", "End queued all runs ncases|%s|" % (g.nCases, ))
def fnldParseInput(mysFilename):
    ''' Return tuple containing
        - the output template string, 
        - a list, one item per line, of dicts of column args from the 
          csv that contain instructions for getting variable values
          from lines.  
        Beware duck-type integers that become strings.

        Format of csv lines:        
        varname,regex to find line,split word number,regex to strip out value

        instruction file format:

        ##becomes comment in output
        ###send out this string as header for the output, no hashes
        =outputformat
        format string
        =variables
        varname,lineregex,wordnumber,valueregex (header)
        (lines of csv data)

    '''
    dParams = dict()
    with open(mysFilename, "r", encoding="'utf-8") as fhInfile:
        # Remove comments.
        lLines = list(
            filter(
                lambda sLine: not re.match("^ *#[^#]", sLine) and not re.match(
                    "^ *$", sLine.rstrip()), fhInfile))

        # Get the output template.  It may be longer than one line.
        lTemplate = fnlLinesInRange(lLines, "^=template", "^=variables")
        lTemplate = list(
            map(
                lambda sLine: sLine.rstrip().replace("###", "").replace(
                    "##", "#"), lTemplate))
        NTRC.tracef(3, "INPT", "proc ParseInput1 template|%s|" % (lTemplate))

        # Fix the separator in the template according to the user spec.
        lAllTemplateNames = [
            lTemplateLine.split() for lTemplateLine in lTemplate
        ]
        lNewTemplate = [
            g.sSeparator.join(lTemplateNamesOneLine)
            for lTemplateNamesOneLine in lAllTemplateNames
        ]
        NTRC.tracef(3, "INPT",
                    "proc ParseInput2 template|%s|" % (lNewTemplate))

        # Now get the CSV args into a dictionary of dictionaries.
        lVarLines = fnlLinesInRange(lLines, "^=variables",
                                    "^=thiswillnotbefound")
        lRowDicts = csv.DictReader(lVarLines)
        NTRC.tracef(5, "INPT",
                    "proc ParseInput3 lRowDicts all|%s|" % (lRowDicts))

        dParams = dict( map( lambda dRowDict:   \
            (dRowDict["varname"],dRowDict)      \
            , lRowDicts ))

    return (lNewTemplate, dParams)
Ejemplo n.º 27
0
    def mAge_sector(self):
        ''' A sector in the shelf fails.  This corrupts a document.
            For the moment, assume that it destroys the document.  
            Eventually, it will have a probability of destroying the 
            document depending on the portion of the document 
            corrupted and the sensitivity of the document to corruption
            (e.g., compressed or encrypted), or the failure hits an
            encryption or license key.  
        '''
        # If the shelf has been emptied by a shelf failure, stop 
        # caring about sector failures.
        while self.bAlive:
            # Sector lifetime depends on shelf lifetime and glitch age.
            fNow = G.env.now
            cLifetime = G.dID2Lifetime[self.sSectorLifetimeID]
            fLifetimeNow = cLifetime.mfCalcCurrentSectorLifetime(fNow)
            fSectorLifeInterval = util.makeexpo(fLifetimeNow)
            NTRC.tracef(3, "SHLF", "proc mAge_sector time|%d| shelf|%s| "
                "next interval|%.3f|hr from life rate|%.3f|hr" 
                % (G.env.now, self.ID, fSectorLifeInterval, fLifetimeNow))
            yield G.env.timeout(fSectorLifeInterval)

            # S E C T O R  E R R O R
            self.nSectorHits += 1
            G.nTimeLastEvent = G.env.now
            NTRC.tracef(3, "SHLF", "proc mAge_sector time|%d| shelf|%s| "
                "Sector_error hits|%d| emptyhits|%d|" 
                % (G.env.now, self.ID, self.nSectorHits, self.nEmptySectorHits))

            # Select a victim Document, probability proportional to size.
            # Small error, size=1.  What doc dies as a result?
            sCopyVictimID = self.mSelectVictimCopy(mynErrorSize=1)

            # New version: compress strings of consecutive misses into single line.
            # Normally we log one line per error regardless of whether it hits or 
            # misses a document.  That results in hideously long log files for 
            # sparse storage structures, like small docs on large shelf. 
            # Count consecutive misses, and issue one summary line before the 
            # next hit.
            # CANDIDATE FOR REFACTORING
            if sCopyVictimID:               # Hidden error in victim doc.
                # Destroy copy on this shelf.
                cCopy = G.dID2Copy[sCopyVictimID]
                sDocID = cCopy.mGetDocID()
                self.mDestroyCopy(sCopyVictimID)
                # Log the summary line if we just ended a string of misses
                if self.nConsecutiveMisses > 0:
                    lg.logInfo("SERVER", "small error t|%6.0f| svr|%s| "
                        "shelf|%s| consecutive misses|%d|" 
                        % (G.env.now, self.sServerID, self.ID, 
                        self.nConsecutiveMisses))
                self.nConsecutiveMisses = 0
                lg.logInfo("SERVER", "small error t|%6.0f| svr|%s| "
                    "shelf|%s| hidden failure in copy|%s| doc|%s|" 
                    % (G.env.now,self.sServerID,self.ID,sCopyVictimID,sDocID))
                NTRC.tracef(3, "FAIL", "proc t|%d| sector failure server|%s| "
                    "qual|%d| shelf|%s| doc|%s| copy|%s|" 
                    % (G.env.now, self.sServerID, 
                    G.dID2Server[self.sServerID].nQual, self.ID, sDocID, 
                    sCopyVictimID))
            else:                           # No victim, hit empty space.
                self.nEmptySectorHits += 1
                NTRC.tracef(3, "SHLF", "proc mAge_sector shelf|%s| "
                    "sector error fell in empty space" 
                    % (self.ID))
                if self.nConsecutiveMisses == 0:
                    lg.logInfo("SERVER", "small error t|%6.0f| svr|%s| "
                        "shelf|%s| hidden failure in copy|%s|" 
                        % (G.env.now, self.sServerID, self.ID, sCopyVictimID))
                self.nConsecutiveMisses += 1
                NTRC.tracef(3, "FAIL", "proc t|%d| sector failure server|%s| "
                    "qual|%d| shelf|%s| copy|%s|" 
                    % (G.env.now, self.sServerID, 
                    G.dID2Server[self.sServerID].nQual, self.ID, sCopyVictimID))
            # Initiate a repair of the dead document.
            # BZZZT NYI: currently all such failures are silent, so they are 
            #  not detected by the client until audited (or end of run).  
        # Shelf is no longer alive, so we do not notice or schedule 
        #  future sector errors.  Log the event.  
        lg.logInfo("SHELF ", "t|%6.0f| dead shelf|%s| of svr|%s|, "
            "no future errors" 
            % (G.env.now, self.ID, self.sServerID))
Ejemplo n.º 28
0
    def mSelectVictimCopy(self, mynErrorSize):
        ''' Which doc copy on this shelf, if any, was hit by this error?
            Throw a uniform dart at all the docs on the shelf, see 
            which one gets hit, or dart falls into empty space.  Doc size counts.  
        '''
        nRandomSpot = util.makeunif(1, self.nCapacity + mynErrorSize - 1)
        nLoc = 0
        NTRC.tracef(5, "SHLF", "proc SelectVictimCopy0 wherehit spot|%s| "
            "hiwater|%s|  shelfid|%s| capacity|%s|" 
            % (nRandomSpot,self.nHiWater,self.ID,self.nCapacity))
        # First, check to see if the failure is maybe in an occupied region.  
        if nRandomSpot <= self.nHiWater:
            # Find the document hit by the error.  May have been hit before, too.  
            # New version, vanilla binary search with adjacent interval checking
            #  on list of all locations assigned on this shelf.
            # After you find the location, check to see that it 
            #  is still occupied by live copy.  
            nLen = len(self.lCopyIDsComplete)
            nDist = (nLen + 1) / 2
            nLoc = nDist
            NTRC.tracef(5, "SHLF", "proc SelectVictimCopy0 searchsetup len|%s| "
                "loc|%s| dist|%s|" 
                % (nLen, nLoc, nDist))
            while 1:
                if nLoc <= 0: nLoc = 1
                if nLoc >= nLen: nLoc = nLen - 1
                nDist = (nDist + 1) / 2
                if nDist == 0: nDist = 1

                nTop = self.lCopyTops[nLoc]
                nBottom = self.lCopyTops[nLoc-1]
                sCopyID = self.lCopyIDsComplete[nLoc-1]
                sDocID = self.lDocIDsComplete[nLoc-1]
                cCopy = G.dID2Copy[sCopyID]

                if nRandomSpot <= nTop:
                    # Lower than top, look down.
                    if nRandomSpot >= nBottom:
                        # Found to left of nLoc.  
                        NTRC.tracef(5, "SHLF", "proc SelectVictimCopy5D "
                            "found victim id|%s| at spot|%s| in[%s,%s]| " 
                            "doc|%s|" 
                            % (sCopyID, nRandomSpot, nBottom, nTop, sDocID))
                        # Is this slot still occupied by a live copy?
                        if sCopyID in self.lCopyIDs:
                            sVictimID = sCopyID
                            NTRC.tracef(3, "SHLF", "proc mSelectVictimCopy "
                                "NEWD end shelf|%s| spot|%d| hits doc|%s| "
                                "placed[%d,%d] size|%d| outof|%d|" 
                                % (self.ID, nRandomSpot, sVictimID, 
                                cCopy.nBlkBegin, cCopy.nBlkEnd, 
                                (cCopy.nBlkEnd-cCopy.nBlkBegin+1), 
                                self.nCapacity))
                        else:
                            sVictimID = None
                            NTRC.tracef(5, "SHLF", "proc SelectVictimCopy2D "
                                "no longer valid copyid|%s| docid|%s|" 
                                % (sCopyID, sDocID))
                            self.nMultipleHits += 1
                        break
                    else:
                        nLoc -= nDist
                        NTRC.tracef(5, "SHLF", "proc SelectVictimCopy3D "
                            "down spot|%s| intvl|[%s,%s| newloc|%s| newdist|%s|" 
                            % (nRandomSpot, nBottom, nTop, nLoc, nDist))
                else:
                    # Higher than top, look up.
                    if nRandomSpot <= self.lCopyTops[nLoc+1]:
                        # Found to right of nLoc.
                        # Reevaluate ids and locations to the next slot 
                        #  on the right.  
                        sCopyID = self.lCopyIDsComplete[nLoc+1-1]
                        sDocID = self.lDocIDsComplete[nLoc+1-1]
                        cCopy = G.dID2Copy[sCopyID]
                        nBottom = self.lCopyTops[nLoc+1-1]
                        sCopyID = self.lCopyIDsComplete[nLoc+1-1]
                        NTRC.tracef(5, "SHLF", "proc SelectVictimCopy5U "
                            "found victim id|%s| at spot|%s| in[%s,%s]| doc|%s|" 
                            % (sCopyID, nRandomSpot, nBottom, nTop, sDocID))
                        # Is this slot still occupied by a live copy?
                        if sCopyID in self.lCopyIDs:
                            sVictimID = sCopyID
                            NTRC.tracef(3, "SHLF", "proc mSelectVictimCopy NEWU "
                                "end shelf|%s| spot|%d| hits doc|%s| "
                                "placed[%d,%d] size|%d| outof|%d|" 
                                % (self.ID, nRandomSpot, sVictimID, 
                                cCopy.nBlkBegin, cCopy.nBlkEnd, 
                                (cCopy.nBlkEnd-cCopy.nBlkBegin+1), 
                                self.nCapacity))
                        else:
                            sVictimID = None
                            NTRC.tracef(5, "SHLF", "proc SelectVictimCopy2U "
                                "no longer valid copyid|%s| docid|%s|" 
                                % (sCopyID, sDocID))
                            self.nMultipleHits += 1
                        break
                    else:
                        nLoc += nDist
                        NTRC.tracef(5, "SHLF", "proc SelectVictimCopy3U up   "
                            "spot|%s| intvl|[%s,%s| newloc|%s| newdist|%s|" 
                            % (nRandomSpot, nBottom, nTop, nLoc, nDist))

        else:   # Outside hiwater area, just count as a miss.
            NTRC.tracef(3, "SHLF", "proc mSelectVictimCopy shelf|%s| spot|%d| "
                "above hiwater|%s| empty" 
                % (self.ID, nRandomSpot, self.nHiWater))
            sVictimID = None
            self.nHitsAboveHiWater += 1
        return sVictimID
def main():
    '''
    Process:
    - Parse the CLI command into g.various data items.
    - Validate user-supplied directories; get environment variables.
    - Make queues to send instructions to pool of worker processes.
    - Create pool of worker processes.
    - Query the searchspace for the stream of instructions
    - For each instruction from database selection, get dict for line
    - Using dict args, construct plausible command lines, into file
    - For each instruction, expand to the number of samples (seeds) to use.
    - When we finally have a single instruction to execute, queue that
       to the worker jobs.
    - When all instructions have been queued, close down the worker processes.
    '''
    NTRC.ntracef(0, "MAIN", "Begin.")
    NTRC.ntracef(0, "MAIN", "TRACE  traceproduction|%s|" % NTRC.isProduction())

    sBrokerCommand = fnsReconstituteCommand(sys.argv)
    fnbMaybeLogCommand(sBrokerCommand)
    NTRC.ntracef(0, "MAIN", "command=|%s|" % (sBrokerCommand.rstrip()))

    # Get args from CLI and put them into the global data
    dCliDict = brokercli.fndCliParse("")
    # Carefully insert any new CLI values into the Global object.
    dCliDictClean = {
        k: util.fnIntPlease(v)
        for k, v in dCliDict.items() if v is not None
    }
    g.__dict__.update(dCliDictClean)

    # Validate that the user-specified directories exist.
    if not fnbValidateDir(g.sFamilyDir):
        raise ValueError("FamilyDir \"%s\" not found" % (g.sFamilyDir))
    if not fnbValidateDir("%s/%s" % (g.sFamilyDir, g.sSpecificDir)):
        raise ValueError("SpecificDir \"%s\" not found" % (g.sSpecificDir))

    # Get command templates from external file.
    fnGetCommandTemplates(g.sCommandListFilename)

    # Construct database query for this invocation.
    g.cFmt = brokerformat.CFormat()
    dQuery = g.cFmt.fndFormatQuery(dCliDict, g)

    # Look for overriding environment variables
    fnvGetEnvironmentOverrides()

    # Open the database to keep "done" records,
    #  and delete moldy, old in-progress records.
    g.mdb = searchdatabasemongo.CSearchDatabase(
        g.sSearchDbMongoName, g.sSearchDbProgressCollectionName,
        g.sSearchDbDoneCollectionName)
    g.mdb.fnvDeleteProgressCollection()

    # Get the set of instructions for today from database.
    NTRC.tracef(
        0, "MAIN",
        "proc querydict2|%s|" % (list(util.fngSortDictItemsByKeys(dQuery))))
    itAllInstructions = searchspace.fndgGetSearchSpace(g.sInsDir, g.sInsTyp,
                                                       dQuery)

    # Start the start-end threads.
    # Define queues.
    # Need a Multiprocessing Manager to own the output queue.  (Do we?) (Yes.)
    mpmgr = mp.Manager()
    g.qJobs = mp.Queue()
    g.qOutput = mpmgr.Queue()
    # Start pool of worker processes.
    g.cWorkersInst = cworkers.CWorkers(nservers=g.nCores,
                                       qinputjobs=g.qJobs,
                                       qoutputdata=g.qOutput)

    # If this wasn't just a listonly run, do all the cases.
    if not g.sListOnly.startswith("Y"):
        NTRC.ntracef(3, "MAIN", "proc all instr|%s|" % (g.lGiantInstr))
    else:
        NTRC.ntracef(0, "MAIN", "Listonly.")
    nRuns = fnnProcessAllInstructions(itAllInstructions)
    NTRC.ntracef(0, "MAIN", "End queued all runs ncases|%s|" % (g.nCases, ))
Ejemplo n.º 30
0
    def mAuditSegment(self, mynThisSegment, mylDocs, mysCollectionID, 
            myeCallerSyncEvent):
        '''\
        SimPy generator to audit one segment of a collection.
        This does all the work.  
        This is the single worst, most confusing, most fragile, and 
         most awful code in the entire program.  Unfortunately, in 
         Python 2, one cannot yield from a vanilla function, only
         from a generator, so all that crap, and its convoluted 
         conditional logic, is in here.  
         *This* is the meanest, nastiest, ugliest father-raper of them all.
        '''

        lg.logInfo("AUDIT2", "begin segmt t|%10.3f| auditid|%s| cycle|%s| "
            "seg|%s| cli|%s| coll|%s| ndocs|%s|range %s-%s|" 
            % (G.env.now, self.ID, self.nNumberOfCycles, mynThisSegment, 
            self.sClientID, self.sCollectionID, len(mylDocs), 
            mylDocs[0], mylDocs[-1]))
    
        ###seize network resource
        # Seize the network resource so this audit cycle 
        # can use it exclusively.
        # The "with" should take care of releasing it
        cClient = G.dID2Client[self.sClientID]
        with cClient.NetworkBandwidthResource.request() as reqnetwork:
            fNetworkWaitBegin = G.env.now

            ###wait if necessary
            result = yield reqnetwork       # Wait for network to be free.
            fNetworkWaitEnd = G.env.now
            fNetworkWaitTime = fNetworkWaitEnd - fNetworkWaitBegin

            ###log result
            # Log event if we had to wait, or not, for the network to be free.  
            lg.logInfo("AUDIT2", "grabnetwork t|%10.3f| auditid|%s| cli|%s| "
                "coll|%s| seg|%s| delay|%9.3f|" 
                % (G.env.now, self.ID, self.sClientID, self.sCollectionID, 
                mynThisSegment, fNetworkWaitTime))
            # And restart the duration clock after the unproductive wait.
            fTimeCycleBegin = G.env.now
            # So much for timekeeping.  Now do some actual work.

            # P h a s e  0: Check to see if any servers have died of old age, 
            #  possibly from being weakened by shock.  If so, they get killed
            #  now so that this audit segment will discover the loss.  
            nResult = CShock.cmBeforeAudit()

            # P h a s e  1: Check servers for copies of docs, record losses.
            # Docs already permanently lost will not be put on the damaged list.
            self.dlDocsDamagedOnServers = cc.defaultdict(list)
            cCollection = G.dID2Collection[mysCollectionID]
            # foreach server used for this collection
            for sServerID in cCollection.lServerIDs:
                cServer = G.dID2Server[sServerID]
                ###foreach doc
                # foreach doc in this segment
                for sDocID in self.lDocsThisSegment:
                    cDoc = G.dID2Document[sDocID]
                    # If the doc is still on the server, retrieve it
                    #  and spend time doing that.
                    # If not, then record that doc damaged on this server. 
                    fTransferTime = self.mRetrieveDoc(sDocID,sServerID)
    
                    ###if okay
                    if fTransferTime:
                        NTRC.tracef(3, "AUD2", "proc AuditSegment3 retrieve "
                            "t|%10.3f| doc|%s| svr|%s| xfrtim|%f|" 
                            % (G.env.now, sDocID, sServerID, fTransferTime))
                        ###yield timeout
                        yield G.env.timeout(fTransferTime)
                    else:
                        if self.mIsDocumentLost(sDocID):
                            pass    # Do not complain if doc already known to be lost.
                        else:
                            # If copy is missing here, save server in 
                            #  lost-list for doc.
                            self.dlDocsDamagedOnServers[sDocID].append(sServerID)
                            NTRC.tracef(5, "AUD2", "proc AuditSegment2 doc|%s| "
                                "svr|%s| lost on|%s|" 
                                % (sDocID, sServerID, 
                                self.dlDocsDamagedOnServers[sDocID]))
                            ###log copy missing on some server
                            lg.logInfo("AUDIT2", "copymissing t|%10.3f| "
                                "doc|%s| svr|%s| aud|%s-c%s-s%s| cli|%s| "
                                "coll|%s|" 
                                % (G.env.now, sDocID, sServerID, self.ID, 
                                self.nNumberOfCycles, mynThisSegment, 
                                self.sClientID, self.sCollectionID))
                # end foreach doc
            # end foreach server used for collection

            '''NOTE: Phase 2 here can be factored out of this function entirely
                because it does not yield or otherwise molest the clock.
                But refactoring must be done carefully because it consumes
                and supplies data from/for phases 1 and 3.  
            '''

            # P h a s e  2: Record severity (majority/minority/permanent) of copy losses.
            # NOTE: This arithmetic seems to be reasonable for all numbers
            #  greater than two, but one copy remaining out of two is judged 
            #  to be a majority, so a repair from that single remaining copy
            #  is labeled a majority repair.  Seems kinda wrong.  
            # Would love to split the logic of this routine into separate
            #  functions; when you're indented seven levels, your logic is,
            #  um, hard to explain.  But we cannot yield from sub-functions, 
            #  at least not in Python2.  
            nServers = len(cCollection.lServerIDs)
            nMajority = (len(cCollection.lServerIDs)+1) / 2 # recall that
                                                            #  int div truncates

            ###foreach doc on damaged list
            for sDocID in sorted(self.dlDocsDamagedOnServers.keys(), 
                key=util.fniNumberFromID):

                ###count docs on all servers
                lDocLostOnServers = self.dlDocsDamagedOnServers[sDocID]
                nCopiesLost = len(lDocLostOnServers)
                nCopiesLeft = nServers - nCopiesLost
                # How many copies left: none, a lot, a few?
                NTRC.tracef(3, "AUD2", "proc AuditSegment1 doc|%s| nsvr|%s| "
                    "loston|%s| nleft|%s|" 
                    % (sDocID, nServers, lDocLostOnServers, nCopiesLeft))

                ###if doc not lost
                ###    assess majority/minority/lost
                if nCopiesLeft == 0:                    # N O N E  remain
                    # Report permanent loss, one ping only.
                    # Do not double-count docs already lost.  Doc will not
                    #  be put onto damaged list if already lost.
                    sRepair = "permloss"
                    lg.logInfo("AUDIT2", "perm loss   t|%10.3f| doc|%s| "
                        "aud|%s-c%s-s%s| cli|%s| coll|%s|" 
                        % (G.env.now, sDocID, self.ID, self.nNumberOfCycles, 
                        mynThisSegment, self.sClientID, self.sCollectionID))
                    self.mRecordDocumentLost(sDocID)
                else:
                    ###doc is repairable; determine majority/minority
                    if nCopiesLeft >= nMajority:      # M A J O R I T Y  remain
                        sRepair = "majority"
                    else:                             # M I N O R I T Y  remain
                        sRepair = "minority"
                    ###log repair type for doc
                    lg.logInfo("AUDIT2", "%s rp t|%10.3f| doc|%s| "
                        "aud|%s-c%s-s%s| cli|%s| coll|%s|" 
                        % (sRepair, G.env.now, sDocID, self.ID, 
                        self.nNumberOfCycles, mynThisSegment, self.sClientID, 
                        self.sCollectionID))

                # P h a s e  3: repair damaged docs, if possible.
                ###foreach server on which doc was damaged
                # Put a copy back on each server where it is missing.  
                for sServerID in lDocLostOnServers:
                    if nCopiesLeft > 0:
                        ###repair
                        fTransferTime = self.mRepairDoc(sDocID,sServerID)
                        '''\
                        If the repair returns False instead of a time, 
                        then that server is no longer accepting documents.
                        Remove that server from the list, invalidate all 
                        its copies.  Then tell the client to find a new 
                        server and re-place the entire collection.  
                        Schedule this notification to occur at the end of the
                        audit cycle or segment to avoid confusing the 
                        ongoing evaluation.  Auditor informs client: oops,
                        you seem to be missing a server; and client takes
                        corrective action at that time.  
                        Send collectionID and serverID to clientID.
                        '''
    
                        ###if not okay ie server dead
                        if fTransferTime == False:
                            self.stDeadServerIDs.add((sServerID, 
                                self.sCollectionID))
                            lg.logInfo("AUDIT2", "dead server t|%10.3f| "
                                "doc|%s| aud|%s| cli|%s| coll|%s| svr|%s|" 
                                % (G.env.now, sDocID, self.ID, self.sClientID, 
                                self.sCollectionID, sServerID))
                        else:
                            ###log repair effected
                            NTRC.tracef(3, "AUD2", "proc AuditSegment4 repair "
                                "t|%10.3f| doc|%s| svr|%s| xfrtim|%f| type|%s|" 
                                % (G.env.now, sDocID, sServerID, fTransferTime, 
                                sRepair))
                            yield G.env.timeout(float(fTransferTime))
                            lg.logInfo("AUDIT2", "repair doc  t|%10.3f| "
                                "doc|%s| aud|%s| cli|%s| coll|%s| svr|%s| "
                                "from %s copies|%d|" 
                                % (G.env.now, sDocID, self.ID, self.sClientID, 
                                self.sCollectionID, sServerID, sRepair, 
                                nCopiesLeft))
    
                            ###count repair as type maj/min for audit and doc
                            # If repair succeeded, record and count it.
                            if sRepair == "majority":
                                self.mRecordDocumentMajorityRepair(sDocID)
                            else:
                                self.mRecordDocumentMinorityRepair(sDocID)
                # end foreach server that lost this doc
            # end foreach damaged doc

            lg.logInfo("AUDIT2", "end   segmt t|%10.3f| auditid|%s| "
                "cycle|%s| seg|%s| cli|%s| coll|%s| ndocs|%s|" 
                % (G.env.now, self.ID, self.nNumberOfCycles, mynThisSegment, 
                self.sClientID, self.sCollectionID, len(mylDocs)))
    
            # After all that, tell the caller we finished.
            myeCallerSyncEvent.succeed(value=mynThisSegment)
            lg.logInfo("AUDIT2", "rls network t|%10.3f| auditid|%s| "
                "cli|%s| coll|%s| seg|%s|" 
                % (G.env.now, self.ID, self.sClientID, self.sCollectionID, 
                mynThisSegment))
        # end network resource

        # If we saw any dead servers during this segment, inform the clients.
        for (sDeadServerID, sDeadCollectionID) in self.stDeadServerIDs:
            cCollection = G.dID2Collection[self.sCollectionID]
            cClient = G.dID2Client[cCollection.sClientID]
            NTRC.ntracef(3, "AUD2", "proc t|%10.3f| inform dead server "
                "auditid|%s| cli|%s| coll|%s| svr|%s| doc|%s|" 
                % (G.env.now, self.ID, self.sClientID, self.sCollectionID, 
                sServerID, sDocID))
            cClient.mServerIsDead(sDeadServerID, sDeadCollectionID)
        self.stDeadServerIDs = set()
def main():
    '''
    Process:
    Open the file given on the command line.
    Open the database given on the command line.
    Read the two lines from the file.
    If the sDoneId(=mongoid) already appears in the done collection 
     of the database, 
    Then    end.
    Else    dictionary-ify the data (maybe csvreader already did that for us).
            add the dict to the done collection, including the sDoneId field.
            end.
    '''
    NTRC.ntracef(0,"DCLN","datacleanup Begin.")
    # Get args from CLI and put them into the global data
    dCliDict = fndCliParse("")
    # Carefully insert any new CLI values into the Global object.
    dCliDictClean = {k:v for k,v in dCliDict.items() if v is not None}
    g.__dict__.update(dCliDictClean)

    # Get data from the extract file: one line of header, one line of data.
    with open(g.sInputFilename,'r') as fhInput:
        oReader = csv.reader(fhInput, delimiter=g.sSeparator)
        lHeader = oReader.next()
        lValues = oReader.next()
        NTRC.tracef(3, "DCLN", "proc lHeader|%s|" % (lHeader))
        NTRC.tracef(3, "DCLN", "proc lValues|%s|" % (lValues))
    dValues = dict(zip(lHeader, lValues))
    NTRC.tracef(3, "DCLN", "proc dValues|%s|" % (dValues))
    
    # Open the SearchDatabase for done and progress records.
    g.mdb = searchdatabasemongo.CSearchDatabase(g.sSearchDbMongoName, 
            g.sProgressCollectionName, 
            g.sDoneCollectionName)
    # Construct database query for this invocation.  
    sInstructionId = dValues["mongoid"]
    sLineOut = g.sSeparator.join(lValues)
    NTRC.tracef(0,"DCLN","proc looking for done recd|%s|" 
        % (sInstructionId))

    # If this extract is already stored in the database, don't do it again.  
    bIsItDone = g.mdb.fnbIsItDone(sInstructionId)
    if not bIsItDone:
        # If case not already done, add data line to the giant output file.
        # But first, ...
        # If the output file does not exist, or is empty, write the header line
        #  in first before the data line.  
        # (If the file does not exist, open mode=a will create an empty one.)
        with open(g.sGiantOutputFilename,'a') as fhOutput:
            if not os.stat(g.sGiantOutputFilename).st_size:
                sHeaderLine = g.sSeparator.join(lHeader)
                fhOutput.write(sHeaderLine + "\n")
                NTRC.tracef(3, "DCLN", "proc wroteheaderline|%s|" 
                    % (sHeaderLine))
            fhOutput.write(sLineOut + "\n")
            NTRC.tracef(0, "DCLN", "proc line appended to output \nsLineOut|%s|" 
                % (sLineOut))

        # Probably record the done record in db.
        if g.sDoNotRecord.startswith("Y"):
            NTRC.tracef(0, "DCLN", "proc Done not recorded.")
        else:
            dResult = g.mdb.fndInsertDoneRecord(sInstructionId, dValues)

        # Probably delete the extract file.
        if g.sDoNotDelete.startswith("Y"):
            NTRC.tracef(0, "DCLN", "proc Input file not deleted.")
        else:
            os.remove(g.sInputFilename)
            NTRC.tracef(3,"DCLN", "proc fileremoved|%s|" 
                % (g.sInputFilename))
            # And remove its in-progress record from the search db.
            g.mdb.fndDeleteProgressRecord(sInstructionId)
    else:
        # Duplicate instruction; do not add line to output file.
        NTRC.tracef(0, "DCLN", "proc line NOT appended to output file \n"
            "sLineOut|%s|" 
            % (sLineOut))

    NTRC.ntracef(0,"DCLN","datacleanup End.")
    return 0
def main():
    '''
    Process:
    - Parse the CLI command into g.various data items.
    - Validate user-supplied directories; get environment variables.
    - Query the searchspace for the stream of instructions
    - For each instruction from database selection, get dict for line
    - Using dict args, construct plausible command lines, into file
    - Check to see that there aren't too many similar processes 
      already running; if too many, then wait.
    - Launch ListActor process to execute commands.
    - Wait a polite interval before launching another.
    '''
    NTRC.ntracef(0, "MAIN", "Begin.")
    NTRC.ntracef(0, "MAIN", "TRACE  traceproduction|%s|" % NTRC.isProduction())

    sBrokerCommand = fnsReconstituteCommand(sys.argv)
    fnbMaybeLogCommand(sBrokerCommand)
    NTRC.ntracef(0, "MAIN", "command=|%s|" % (sBrokerCommand.rstrip()))

    # Get args from CLI and put them into the global data
    dCliDict = brokercli.fndCliParse("")
    # Carefully insert any new CLI values into the Global object.  
    dCliDictClean = {k:util.fnIntPlease(v) for k,v in dCliDict.items() 
                        if v is not None}
    g.__dict__.update(dCliDictClean)

    # Validate that the user-specified directories exist.
    if not fnbValidateDir(g.sFamilyDir):
        raise ValueError("FamilyDir \"%s\" not found" % (g.sFamilyDir))
    if not fnbValidateDir("%s/%s" % (g.sFamilyDir, g.sSpecificDir)):
        raise ValueError("SpecificDir \"%s\" not found" % (g.sSpecificDir))

    # Get command templates from external file.
    fnGetCommandTemplates(g.sCommandListFilename)

    # Construct database query for this invocation.
    g.cFmt = brokerformat.CFormat()
    dQuery = g.cFmt.fndFormatQuery(dCliDict, g)

    # Look for overriding environment variables
    fnvGetEnvironmentOverrides()

    # Open the database to keep "done" records,
    #  and delete moldy, old in-progress records.
    g.mdb = searchdatabasemongo.CSearchDatabase(g.sSearchDbMongoName, 
                g.sSearchDbProgressCollectionName, 
                g.sSearchDbDoneCollectionName)
    g.mdb.fnvDeleteProgressCollection()
    
    # Get the set of instructions for today from database.
    NTRC.tracef(0,"MAIN","proc querydict2|%s|" % ((dQuery)))
    itAllInstructions = searchspace.fndgGetSearchSpace(g.sInsDir, g.sInsTyp, 
                        dQuery)
    nRuns = fnnProcessAllInstructions(itAllInstructions)
    
    # If this wasn't just a listonly run, do all the cases.  
    if not g.sListOnly.startswith("Y"):
        NTRC.ntracef(3, "MAIN", "proc all instr|%s|" % (g.lGiantInstr))
        nCases = nb.fntRunEverything(g, iter(g.lGiantInstr)
                                , g.nCoreTimer, g.nStuckLimit)
    else:
        nCases = len(g.lGiantInstr)
    NTRC.ntracef(0, "MAIN", "End ncases|%s|" % (nCases,))
    def mAuditSegment(self, mynThisSegment, mylDocs, mysCollectionID, 
            myeCallerSyncEvent):
        '''\
        SimPy generator to audit one segment of a collection.
        This does all the work.  
        This is the single worst, most confusing, most fragile, and 
         most awful code in the entire program.  Unfortunately, in 
         Python 2, one cannot yield from a vanilla function, only
         from a generator, so all that crap, and its convoluted 
         conditional logic, is in here.  
         *This* is the meanest, nastiest, ugliest father-raper of them all.
        '''

        lg.logInfo("AUDIT2", "begin segmt t|%10.3f| auditid|%s| cycle|%s| "
            "seg|%s| cli|%s| coll|%s| ndocs|%s|range %s-%s|" 
            % (G.env.now, self.ID, self.nNumberOfCycles, mynThisSegment, 
            self.sClientID, self.sCollectionID, len(mylDocs), 
            mylDocs[0], mylDocs[-1]))
    
        ###seize network resource
        # Seize the network resource so this audit cycle 
        # can use it exclusively.
        # The "with" should take care of releasing it
        cClient = G.dID2Client[self.sClientID]
        with cClient.NetworkBandwidthResource.request() as reqnetwork:
            fNetworkWaitBegin = G.env.now

            ###wait if necessary
            result = yield reqnetwork       # Wait for network to be free.
            fNetworkWaitEnd = G.env.now
            fNetworkWaitTime = fNetworkWaitEnd - fNetworkWaitBegin

            ###log result
            # Log event if we had to wait, or not, for the network to be free.  
            lg.logInfo("AUDIT2", "grabnetwork t|%10.3f| auditid|%s| cli|%s| "
                "coll|%s| seg|%s| delay|%9.3f|" 
                % (G.env.now, self.ID, self.sClientID, self.sCollectionID, 
                mynThisSegment, fNetworkWaitTime))
            # And restart the duration clock after the unproductive wait.
            fTimeCycleBegin = G.env.now
            # So much for timekeeping.  Now do some actual work.

            # P h a s e  0: Check to see if any servers have died of old age, 
            #  possibly from being weakened by shock.  If so, they get killed
            #  now so that this audit segment will discover the loss.  
            nResult = CShock.cmBeforeAudit()

            # P h a s e  1: Check servers for copies of docs, record losses.
            # Docs already permanently lost will not be put on the damaged list.
            self.dlDocsDamagedOnServers = cc.defaultdict(list)
            cCollection = G.dID2Collection[mysCollectionID]
            # foreach server used for this collection
            for sServerID in cCollection.lServerIDs:
                cServer = G.dID2Server[sServerID]
                ###foreach doc
                # foreach doc in this segment
                for sDocID in self.lDocsThisSegment:
                    cDoc = G.dID2Document[sDocID]
                    # If the doc is still on the server, retrieve it
                    #  and spend time doing that.
                    # If not, then record that doc damaged on this server. 
                    fTransferTime = self.mRetrieveDoc(sDocID,sServerID)
    
                    ###if okay
                    if fTransferTime:
                        NTRC.tracef(3, "AUD2", "proc AuditSegment3 retrieve "
                            "t|%10.3f| doc|%s| svr|%s| xfrtim|%f|" 
                            % (G.env.now, sDocID, sServerID, fTransferTime))
                        ###yield timeout
                        yield G.env.timeout(fTransferTime)
                    else:
                        if self.mIsDocumentLost(sDocID):
                            pass    # Do not complain if doc already known to be lost.
                        else:
                            # If copy is missing here, save server in 
                            #  lost-list for doc.
                            self.dlDocsDamagedOnServers[sDocID].append(sServerID)
                            NTRC.tracef(5, "AUD2", "proc AuditSegment2 doc|%s| "
                                "svr|%s| lost on|%s|" 
                                % (sDocID, sServerID, 
                                self.dlDocsDamagedOnServers[sDocID]))
                            ###log copy missing on some server
                            lg.logInfo("AUDIT2", "copymissing t|%10.3f| "
                                "doc|%s| svr|%s| aud|%s-c%s-s%s| cli|%s| "
                                "coll|%s|" 
                                % (G.env.now, sDocID, sServerID, self.ID, 
                                self.nNumberOfCycles, mynThisSegment, 
                                self.sClientID, self.sCollectionID))
                # end foreach doc
            # end foreach server used for collection

            '''NOTE: Phase 2 here can be factored out of this function entirely
                because it does not yield or otherwise molest the clock.
                But refactoring must be done carefully because it consumes
                and supplies data from phases 1 and 3.  
            '''

            # P h a s e  2: Record severity (majority/minority/permanent) of copy losses.
            # NOTE: This arithmetic seems to be reasonable for all numbers
            #  greater than two, but one copy remaining out of two is judged 
            #  to be a majority, so a repair from that single remaining copy
            #  is labeled a majority repair.  Seems kinda wrong.  
            # Would love to split the logic of this routine into separate
            #  functions; when you're indented seven levels, your logic is,
            #  um, hard to explain.  But we cannot yield from sub-functions, 
            #  at least not in Python2.  
            nServers = len(cCollection.lServerIDs)
            nMajority = (len(cCollection.lServerIDs)+1) / 2 # recall that
                                                            #  int div truncates

            ###foreach doc on damaged list
            for sDocID in sorted(self.dlDocsDamagedOnServers.keys(), 
                key=util.fniNumberFromID):

                ###count docs on all servers
                lDocLostOnServers = self.dlDocsDamagedOnServers[sDocID]
                nCopiesLost = len(lDocLostOnServers)
                nCopiesLeft = nServers - nCopiesLost
                # How many copies left: none, a lot, a few?
                NTRC.tracef(3, "AUD2", "proc AuditSegment1 doc|%s| nsvr|%s| "
                    "loston|%s| nleft|%s|" 
                    % (sDocID, nServers, lDocLostOnServers, nCopiesLeft))

                ###if doc not lost
                ###    assess majority/minority/lost
                if nCopiesLeft == 0:                    # N O N E  remain
                    # Report permanent loss, one ping only.
                    # Do not double-count docs already lost.  Doc will not
                    #  be put onto damaged list if already lost.
                    sRepair = "permloss"
                    lg.logInfo("AUDIT2", "perm loss   t|%10.3f| doc|%s| "
                        "aud|%s-c%s-s%s| cli|%s| coll|%s|" 
                        % (G.env.now, sDocID, self.ID, self.nNumberOfCycles, 
                        mynThisSegment, self.sClientID, self.sCollectionID))
                    self.mRecordDocumentLost(sDocID)
                else:
                    ###doc is repairable; determine majority/minority
                    if nCopiesLeft >= nMajority:      # M A J O R I T Y  remain
                        sRepair = "majority"
                    else:                             # M I N O R I T Y  remain
                        sRepair = "minority"
                    ###log repair type for doc
                    lg.logInfo("AUDIT2", "%s rp t|%10.3f| doc|%s| "
                        "aud|%s-c%s-s%s| cli|%s| coll|%s|" 
                        % (sRepair, G.env.now, sDocID, self.ID, 
                        self.nNumberOfCycles, mynThisSegment, self.sClientID, 
                        self.sCollectionID))

                # P h a s e  3: repair damaged docs, if possible.
                ###foreach server on which doc was damaged
                # Put a copy back on each server where it is missing.  
                for sServerID in lDocLostOnServers:
                    if nCopiesLeft > 0:
                        ###repair
                        fTransferTime = self.mRepairDoc(sDocID,sServerID)
                        '''\
                        If the repair returns False instead of a time, 
                        then that server is no longer accepting documents.
                        Remove that server from the list, invalidate all 
                        its copies.  Then tell the client to find a new 
                        server and re-place the entire collection.  
                        Schedule this notification to occur at the end of the
                        audit cycle or segment to avoid confusing the 
                        ongoing evaluation.  Auditor informs client: oops,
                        you seem to be missing a server; and client takes
                        corrective action at that time.  
                        Send collectionID and serverID to clientID.
                        '''
    
                        ###if not okay ie server dead
                        if fTransferTime == False:
                            self.stDeadServerIDs.add((sServerID, 
                                self.sCollectionID))
                            lg.logInfo("AUDIT2", "dead server t|%10.3f| "
                                "doc|%s| aud|%s| cli|%s| coll|%s| svr|%s|" 
                                % (G.env.now, sDocID, self.ID, self.sClientID, 
                                self.sCollectionID, sServerID))
                        else:
                            ###log repair effected
                            NTRC.tracef(3, "AUD2", "proc AuditSegment4 repair "
                                "t|%10.3f| doc|%s| svr|%s| xfrtim|%f| type|%s|" 
                                % (G.env.now, sDocID, sServerID, fTransferTime, 
                                sRepair))
                            yield G.env.timeout(float(fTransferTime))
                            lg.logInfo("AUDIT2", "repair doc  t|%10.3f| "
                                "doc|%s| aud|%s| cli|%s| coll|%s| svr|%s| "
                                "from %s copies|%d|" 
                                % (G.env.now, sDocID, self.ID, self.sClientID, 
                                self.sCollectionID, sServerID, sRepair, 
                                nCopiesLeft))
    
                            ###count repair as type maj/min for audit and doc
                            # If repair succeeded, record and count it.
                            if sRepair == "majority":
                                self.mRecordDocumentMajorityRepair(sDocID)
                            else:
                                self.mRecordDocumentMinorityRepair(sDocID)
                # end foreach server that lost this doc
            # end foreach damaged doc

            lg.logInfo("AUDIT2", "end   segmt t|%10.3f| auditid|%s| "
                "cycle|%s| seg|%s| cli|%s| coll|%s| ndocs|%s|" 
                % (G.env.now, self.ID, self.nNumberOfCycles, mynThisSegment, 
                self.sClientID, self.sCollectionID, len(mylDocs)))
    
            # After all that, tell the caller we finished.
            myeCallerSyncEvent.succeed(value=mynThisSegment)
            lg.logInfo("AUDIT2", "rls network t|%10.3f| auditid|%s| "
                "cli|%s| coll|%s| seg|%s|" 
                % (G.env.now, self.ID, self.sClientID, self.sCollectionID, 
                mynThisSegment))
        # end network resource

        # If we saw any dead servers during this segment, inform the clients.
        for (sDeadServerID, sDeadCollectionID) in self.stDeadServerIDs:
            cCollection = G.dID2Collection[self.sCollectionID]
            cClient = G.dID2Client[cCollection.sClientID]
            NTRC.ntracef(3, "AUD2", "proc t|%10.3f| inform dead server "
                "auditid|%s| cli|%s| coll|%s| svr|%s| doc|%s|" 
                % (G.env.now, self.ID, self.sClientID, self.sCollectionID, 
                sServerID, sDocID))
            cClient.mServerIsDead(sDeadServerID, sDeadCollectionID)
        self.stDeadServerIDs = set()
def main(mysInstructionsFileName,mysLogFileName):
    (lTemplate,g.dVars) = fnldParseInput(mysInstructionsFileName)
    lLines = list()
    with open(mysLogFileName,"r") as fhLogFile:

        '''\
        get list of tuples: lines that match some lineregex, for which var

        foreach line, 
            if matches any lineregex
                extract value, 
                put varname and value in dictionary

        be careful never to form a list of lines of the input log file, 
         or of anything that is big-O of that.  filter first.
        '''

        # Form list of all lines that match some var.
        nLineNr = 0
        lLinesSelectedRaw = list()
        for sLine in fhLogFile:
            nLineNr += 1                # Need line nr only for debugging.
            for sVarname in g.dVars.keys():
                tResult = fntDoesLineMatchThisVar(sLine, nLineNr, sVarname)
                # If line matches any var, save the line and the varname.
                if tResult[0]: 
                    lLinesSelectedRaw.append(tResult)
        NTRC.tracef(3,"MN2","proc lLinesSelectedRaw len|%s| all|%s|" % (len(lLinesSelectedRaw),lLinesSelectedRaw))

    # Eliminate duplicates.  Should not be any if the lineregexes are 
    #  specific enough.  
    lLinesSelected = list(set(lLinesSelectedRaw))
    NTRC.tracef(5,"MN3","proc lLinesSelected len|%s| all|%s|" % (len(lLinesSelected),lLinesSelected))

    # Extract variable value from each matching line.
    # List of lines selected is actually a list of triples.
    lResults = map( lambda (omatch, sLine, sVarname): 
                fntMatchValue(sLine, g.dVars[sVarname])
                , lLinesSelected )
    # Returned list of (name,val) tuples for vars in lines selected.
    #  Make a dictionary.  
    dValues = dict(lResults)

    # In case we did not find the line for a variable, dummy up a value.
    for sKey in g.dVars: 
        dValues.setdefault(sKey,"nolinefound")

    # And in case we didn't even find a rule for some variable that
    #  will be used in the template, dummy up a value for it, too.  
    sTemplateHeader = "\n".join(lTemplate).replace("{","").replace("}","").replace("\n"," ")
    lTemplateVars = sTemplateHeader.split()
    for sTemplateVar in lTemplateVars: 
        dValues.setdefault(sTemplateVar,"norulefound")

    # Add the synthetic variables to the value dictionary.
    dSyntho = fndGetSyntheticVars()
    dValues.update(dSyntho)

    # Make the seed value, at least, print constant width for legibility.  
    sSeed = dValues["seed"]
    sSeednew = "%09d" % (int(sSeed))
    dValues["seed"] = sSeednew

    # Fill in the template with values and print.  
    # Template is allowed to be multiple lines.
    sTemplate = "\n".join(lTemplate)
    sLineout = makeCmd(sTemplate,dValues)
    if g.bHeader or os.environ.get("header",None):
        # Header is a single line concatenation of all the substitutions
        #  in the template.
        #  If the template is longer than one line, well, you can't read 
        #  the data with a simple header anyway.  Oops.  
        sHeader = sTemplateHeader
        print sHeader
    # Newline already pasted on the end of template; don't add another.
    print sLineout,
 def mfCalcCurrentSectorLifetime(self, myfNow):
     '''
     if glitch in progress
       if glitch is too old
         turn it off
         log expired
         normal lifetime
       else 
         calc reduced lifetime
         if decay below ignore limit
           turn it off
           log below limit
     '''
     if self.bGlitchActive:
         fTimeDiff = myfNow - self.fGlitchBegin
         fDuration = (float(self.nGlitchMaxlife))
         # If the glitch lifetime has expired, turn it off.
         if fTimeDiff > fDuration:
             NTRC.tracef(
                 3, "LIFE", "proc glitch lifetime expired "
                 "id|%s| num|%s| start|%.3f| now|%.3f| maxlife|%s|" %
                 (self.ID, self.nGlitches, self.fGlitchBegin, myfNow,
                  self.nGlitchMaxlife))
             lg.logInfo(
                 "LIFETIME", "expired   t|%6.0f| shelf|%s| "
                 "id|%s| num|%s| start|%.3f| now|%.3f| maxlife|%s|" %
                 (myfNow, self.sShelfID, self.ID, self.nGlitches,
                  self.fGlitchBegin, myfNow, self.nGlitchMaxlife))
             self.bGlitchActive = False
             self.fGlitchTime += fTimeDiff
             self.fCurrentLifetime = self.fOriginalLifetime
         else:
             # The glitch is still current.
             # Carefully calculate the new sector lifetime based on
             #  some reduction due to glitch and the age of the glitch.
             #                fTimeDiff = myfNow - self.fGlitchBegin
             fAgeInHalflives = fTimeDiff / self.nGlitchDecayHalflife
             fExponentialDecay = exp(-self.fLn2 * fAgeInHalflives)
             fReductionFraction = 1.0 * self.nReductionPercentage / 100.0
             self.fCurrentLifetime = (
                 1.0 * self.fOriginalLifetime *
                 (1.0 - fReductionFraction * fExponentialDecay))
             NTRC.tracef(
                 3, "LIFE", "proc calcsectorlife num|%s| "
                 "started|%.3f| age|%.3f| decay|%.3f| reduct|%.3f| "
                 "currlife|%.3f|" %
                 (self.nGlitches, self.fGlitchBegin, fAgeInHalflives,
                  fExponentialDecay, fReductionFraction,
                  self.fCurrentLifetime))
             # If the glitch has diminished to a low level,
             #  turn it off.
             if fExponentialDecay < G.fGlitchIgnoreLimit:
                 self.bGlitchActive = False
                 self.fGlitchTime += fTimeDiff
                 NTRC.tracef(
                     3, "LIFE", "proc glitch turned off lifeid|%s| "
                     "num|%s| started|%.3f| age|%.3f| decay|%.3f|" %
                     (self.ID, self.nGlitches, self.fGlitchBegin,
                      fAgeInHalflives, fExponentialDecay))
     else:
         # No current glitch active.  Lifetime is as usual.
         self.fCurrentLifetime = self.fOriginalLifetime
     return self.fCurrentLifetime
Ejemplo n.º 36
0
    def mAge_sector(self):
        ''' A sector in the shelf fails.  This corrupts a document.
            For the moment, assume that it destroys the document.  
            Eventually, it will have a probability of destroying the 
            document depending on the portion of the document 
            corrupted and the sensitivity of the document to corruption
            (e.g., compressed or encrypted), or the failure hits an
            encryption or license key.  
        '''
        # If the shelf has been emptied by a shelf failure, stop
        # caring about sector failures.
        while self.bAlive:
            # Sector lifetime depends on shelf lifetime and glitch age.
            fNow = G.env.now
            cLifetime = G.dID2Lifetime[self.sSectorLifetimeID]
            fLifetimeNow = cLifetime.mfCalcCurrentSectorLifetime(fNow)
            fSectorLifeInterval = util.makeexpo(fLifetimeNow)
            NTRC.tracef(
                3, "SHLF", "proc mAge_sector time|%d| shelf|%s| "
                "next interval|%.3f|hr from life rate|%.3f|hr" %
                (G.env.now, self.ID, fSectorLifeInterval, fLifetimeNow))
            yield G.env.timeout(fSectorLifeInterval)

            # S E C T O R  E R R O R
            self.nSectorHits += 1
            G.nTimeLastEvent = G.env.now
            NTRC.tracef(
                3, "SHLF", "proc mAge_sector time|%d| shelf|%s| "
                "Sector_error hits|%d| emptyhits|%d|" %
                (G.env.now, self.ID, self.nSectorHits, self.nEmptySectorHits))

            # Select a victim Document, probability proportional to size.
            # Small error, size=1.  What doc dies as a result?
            sCopyVictimID = self.mSelectVictimCopy(mynErrorSize=1)

            # New version: compress strings of consecutive misses into single line.
            # Normally we log one line per error regardless of whether it hits or
            # misses a document.  That results in hideously long log files for
            # sparse storage structures, like small docs on large shelf.
            # Count consecutive misses, and issue one summary line before the
            # next hit.
            # CANDIDATE FOR REFACTORING
            if sCopyVictimID:  # Hidden error in victim doc.
                # Destroy copy on this shelf.
                cCopy = G.dID2Copy[sCopyVictimID]
                sDocID = cCopy.mGetDocID()
                self.mDestroyCopy(sCopyVictimID)
                # Log the summary line if we just ended a string of misses
                if self.nConsecutiveMisses > 0:
                    lg.logInfo(
                        "SERVER", "small error t|%6.0f| svr|%s| "
                        "shelf|%s| consecutive misses|%d|" %
                        (G.env.now, self.sServerID, self.ID,
                         self.nConsecutiveMisses))
                self.nConsecutiveMisses = 0
                lg.logInfo(
                    "SERVER", "small error t|%6.0f| svr|%s| "
                    "shelf|%s| hidden failure in copy|%s| doc|%s|" %
                    (G.env.now, self.sServerID, self.ID, sCopyVictimID,
                     sDocID))
                NTRC.tracef(
                    3, "FAIL", "proc t|%d| sector failure server|%s| "
                    "qual|%d| shelf|%s| doc|%s| copy|%s|" %
                    (G.env.now, self.sServerID,
                     G.dID2Server[self.sServerID].nQual, self.ID, sDocID,
                     sCopyVictimID))
            else:  # No victim, hit empty space.
                self.nEmptySectorHits += 1
                NTRC.tracef(
                    3, "SHLF", "proc mAge_sector shelf|%s| "
                    "sector error fell in empty space" % (self.ID))
                if self.nConsecutiveMisses == 0:
                    lg.logInfo(
                        "SERVER", "small error t|%6.0f| svr|%s| "
                        "shelf|%s| hidden failure in copy|%s|" %
                        (G.env.now, self.sServerID, self.ID, sCopyVictimID))
                self.nConsecutiveMisses += 1
                NTRC.tracef(
                    3, "FAIL", "proc t|%d| sector failure server|%s| "
                    "qual|%d| shelf|%s| copy|%s|" %
                    (G.env.now, self.sServerID,
                     G.dID2Server[self.sServerID].nQual, self.ID,
                     sCopyVictimID))
            # Initiate a repair of the dead document.
            # BZZZT NYI: currently all such failures are silent, so they are
            #  not detected by the client until audited (or end of run).
        # Shelf is no longer alive, so we do not notice or schedule
        #  future sector errors.  Log the event.
        lg.logInfo(
            "SHELF ", "t|%6.0f| dead shelf|%s| of svr|%s|, "
            "no future errors" % (G.env.now, self.ID, self.sServerID))
Ejemplo n.º 37
0
    def mSelectVictimCopy(self, mynErrorSize):
        ''' Which doc copy on this shelf, if any, was hit by this error?
            Throw a uniform dart at all the docs on the shelf, see 
            which one gets hit, or dart falls into empty space.  Doc size counts.  
        '''
        nRandomSpot = util.makeunif(1, self.nCapacity + mynErrorSize - 1)
        nLoc = 0
        NTRC.tracef(
            5, "SHLF", "proc SelectVictimCopy0 wherehit spot|%s| "
            "hiwater|%s|  shelfid|%s| capacity|%s|" %
            (nRandomSpot, self.nHiWater, self.ID, self.nCapacity))
        # First, check to see if the failure is maybe in an occupied region.
        if nRandomSpot <= self.nHiWater:
            # Find the document hit by the error.  May have been hit before, too.
            # New version, vanilla binary search with adjacent interval checking
            #  on list of all locations assigned on this shelf.
            # After you find the location, check to see that it
            #  is still occupied by live copy.
            nLen = len(self.lCopyIDsComplete)
            nDist = (nLen + 1) / 2
            nLoc = nDist
            NTRC.tracef(
                5, "SHLF", "proc SelectVictimCopy0 searchsetup len|%s| "
                "loc|%s| dist|%s|" % (nLen, nLoc, nDist))
            while 1:
                if nLoc <= 0: nLoc = 1
                if nLoc >= nLen: nLoc = nLen - 1
                nDist = (nDist + 1) / 2
                if nDist == 0: nDist = 1

                nTop = self.lCopyTops[nLoc]
                nBottom = self.lCopyTops[nLoc - 1]
                sCopyID = self.lCopyIDsComplete[nLoc - 1]
                sDocID = self.lDocIDsComplete[nLoc - 1]
                cCopy = G.dID2Copy[sCopyID]

                if nRandomSpot <= nTop:
                    # Lower than top, look down.
                    if nRandomSpot >= nBottom:
                        # Found to left of nLoc.
                        NTRC.tracef(
                            5, "SHLF", "proc SelectVictimCopy5D "
                            "found victim id|%s| at spot|%s| in[%s,%s]| "
                            "doc|%s|" %
                            (sCopyID, nRandomSpot, nBottom, nTop, sDocID))
                        # Is this slot still occupied by a live copy?
                        if sCopyID in self.lCopyIDs:
                            sVictimID = sCopyID
                            NTRC.tracef(
                                3, "SHLF", "proc mSelectVictimCopy "
                                "NEWD end shelf|%s| spot|%d| hits doc|%s| "
                                "placed[%d,%d] size|%d| outof|%d|" %
                                (self.ID, nRandomSpot, sVictimID,
                                 cCopy.nBlkBegin, cCopy.nBlkEnd,
                                 (cCopy.nBlkEnd - cCopy.nBlkBegin + 1),
                                 self.nCapacity))
                        else:
                            sVictimID = None
                            NTRC.tracef(
                                5, "SHLF", "proc SelectVictimCopy2D "
                                "no longer valid copyid|%s| docid|%s|" %
                                (sCopyID, sDocID))
                            self.nMultipleHits += 1
                        break
                    else:
                        nLoc -= nDist
                        NTRC.tracef(
                            5, "SHLF", "proc SelectVictimCopy3D "
                            "down spot|%s| intvl|[%s,%s| newloc|%s| newdist|%s|"
                            % (nRandomSpot, nBottom, nTop, nLoc, nDist))
                else:
                    # Higher than top, look up.
                    if nRandomSpot <= self.lCopyTops[nLoc + 1]:
                        # Found to right of nLoc.
                        # Reevaluate ids and locations to the next slot
                        #  on the right.
                        sCopyID = self.lCopyIDsComplete[nLoc + 1 - 1]
                        sDocID = self.lDocIDsComplete[nLoc + 1 - 1]
                        cCopy = G.dID2Copy[sCopyID]
                        nBottom = self.lCopyTops[nLoc + 1 - 1]
                        sCopyID = self.lCopyIDsComplete[nLoc + 1 - 1]
                        NTRC.tracef(
                            5, "SHLF", "proc SelectVictimCopy5U "
                            "found victim id|%s| at spot|%s| in[%s,%s]| doc|%s|"
                            % (sCopyID, nRandomSpot, nBottom, nTop, sDocID))
                        # Is this slot still occupied by a live copy?
                        if sCopyID in self.lCopyIDs:
                            sVictimID = sCopyID
                            NTRC.tracef(
                                3, "SHLF", "proc mSelectVictimCopy NEWU "
                                "end shelf|%s| spot|%d| hits doc|%s| "
                                "placed[%d,%d] size|%d| outof|%d|" %
                                (self.ID, nRandomSpot, sVictimID,
                                 cCopy.nBlkBegin, cCopy.nBlkEnd,
                                 (cCopy.nBlkEnd - cCopy.nBlkBegin + 1),
                                 self.nCapacity))
                        else:
                            sVictimID = None
                            NTRC.tracef(
                                5, "SHLF", "proc SelectVictimCopy2U "
                                "no longer valid copyid|%s| docid|%s|" %
                                (sCopyID, sDocID))
                            self.nMultipleHits += 1
                        break
                    else:
                        nLoc += nDist
                        NTRC.tracef(
                            5, "SHLF", "proc SelectVictimCopy3U up   "
                            "spot|%s| intvl|[%s,%s| newloc|%s| newdist|%s|" %
                            (nRandomSpot, nBottom, nTop, nLoc, nDist))

        else:  # Outside hiwater area, just count as a miss.
            NTRC.tracef(
                3, "SHLF", "proc mSelectVictimCopy shelf|%s| spot|%d| "
                "above hiwater|%s| empty" %
                (self.ID, nRandomSpot, self.nHiWater))
            sVictimID = None
            self.nHitsAboveHiWater += 1
        return sVictimID
def main(mysInstructionsFileName, mysLogFileName):
    (lTemplate, g.dVars) = fnldParseInput(mysInstructionsFileName)
    lLines = list()
    with open(mysLogFileName, "r", encoding="'utf-8") as fhLogFile:
        '''\
        get list of tuples: lines that match some lineregex, for which var

        foreach line, 
            if matches any lineregex
                extract value, 
                put varname and value in dictionary

        be careful never to form a list of lines of the input log file, 
         or of anything that is big-O of that.  filter first.
        '''

        # Form list of all lines that match some var.
        nLineNr = 0
        lLinesSelectedRaw = list()
        for sLine in fhLogFile:
            nLineNr += 1  # Need line nr only for debugging.
            for sVarname in g.dVars.keys():
                tResult = fntDoesLineMatchThisVar(sLine, nLineNr, sVarname)
                # If line matches any var, save the line and the varname.
                if tResult[0]:
                    lLinesSelectedRaw.append(tResult)
        NTRC.tracef(
            3, "MN2", "proc lLinesSelectedRaw len|%s| all|%s|" %
            (len(lLinesSelectedRaw), lLinesSelectedRaw))

    # Eliminate duplicates.  Should not be any if the lineregexes are
    #  specific enough.
    lLinesSelected = list(set(lLinesSelectedRaw))
    NTRC.tracef(
        5, "MN3", "proc lLinesSelected len|%s| all|%s|" %
        (len(lLinesSelected), lLinesSelected))

    # Extract variable value from each matching line.
    # List of lines selected is actually a list of triples.
    #    lResults = map( lambda ((omatch, sLine, sVarname)):
    #                fntMatchValue(sLine, g.dVars[sVarname])
    #                , lLinesSelected )
    # AAARGH: PythonV3 removed tuples as args for lambdas!!!
    lResults = map(lambda tLine: fntMatchValue(tLine[1], g.dVars[tLine[2]]),
                   lLinesSelected)
    # Returned list of (name,val) tuples for vars in lines selected.
    #  Make a dictionary.
    dValues = dict(lResults)

    # In case we did not find the line for a variable, dummy up a value.
    for sKey in g.dVars:
        dValues.setdefault(sKey, "nolinefound")

    # And in case we didn't even find a rule for some variable that
    #  will be used in the template, dummy up a value for it, too.
    sTemplateHeader = "\n".join(lTemplate).replace("{", "").replace(
        "}", "").replace("\n", " ")
    lTemplateVars = sTemplateHeader.split()
    for sTemplateVar in lTemplateVars:
        dValues.setdefault(sTemplateVar, "norulefound")

    # Add the synthetic variables to the value dictionary.
    dSyntho = fndGetSyntheticVars()
    dValues.update(dSyntho)

    # Make the seed value, at least, print constant width for legibility.
    sSeed = dValues["seed"]
    sSeednew = "%09d" % (int(sSeed))
    dValues["seed"] = sSeednew

    # Fill in the template with values and print.
    # Template is allowed to be multiple lines.
    sTemplate = "\n".join(lTemplate)
    sLineout = makeCmd(sTemplate, dValues)
    if g.bHeader or os.environ.get("header", None):
        # Header is a single line concatenation of all the substitutions
        #  in the template.
        #  If the template is longer than one line, well, you can't read
        #  the data with a simple header anyway.  Oops.
        sHeader = sTemplateHeader
        print(sHeader)
    # Newline already pasted on the end of template; don't add another.
    print(sLineout, )
def main():
    '''
    Process:
    Open the file given on the command line.
    Open the database given on the command line.
    Read the two lines from the file.
    If the sDoneId(=mongoid) already appears in the done collection 
     of the database, 
    Then    end.
    Else    dictionary-ify the data (maybe csvreader already did that for us).
            add the dict to the done collection, including the sDoneId field.
            end.
    '''
    NTRC.ntracef(0,"DCLN","datacleanup Begin.")
    # Get args from CLI and put them into the global data
    dCliDict = fndCliParse("")
    # Carefully insert any new CLI values into the Global object.
    dCliDictClean = {k:v for k,v in dCliDict.items() if v is not None}
    g.__dict__.update(dCliDictClean)

    # Get data from the extract file: one line of header, one line of data.
    with open(g.sInputFilename,'r') as fhInput:
        oReader = csv.reader(fhInput, delimiter=g.sSeparator)
        lHeader = next(oReader)
        lValues = next(oReader)
        NTRC.tracef(3, "DCLN", "proc lHeader|%s|" % (lHeader))
        NTRC.tracef(3, "DCLN", "proc lValues|%s|" % (lValues))
    dValues = dict(zip(lHeader, lValues))
    NTRC.tracef(3, "DCLN", "proc dValues|%s|" % (dValues))
    
    # Open the SearchDatabase for done and progress records.
    g.mdb = searchdatabasemongo.CSearchDatabase(g.sSearchDbMongoName, 
            g.sProgressCollectionName, 
            g.sDoneCollectionName)
    # Construct database query for this invocation.  
    sInstructionId = dValues["mongoid"]
    sLineOut = g.sSeparator.join(lValues)
    NTRC.tracef(0,"DCLN","proc looking for done recd|%s|" 
                % (sInstructionId))

    # If this extract is already stored in the database, don't do it again.  
    bIsItDone = g.mdb.fnbIsItDone(sInstructionId)
    if not bIsItDone:
        # If case not already done, add data line to the giant output file.
        # But first, ...
        # If the output file does not exist, or is empty, write the header line
        #  in first before the data line.  
        # (If the file does not exist, open mode=a will create an empty one.)
        with open(g.sGiantOutputFilename,'a') as fhOutput:
            if not os.stat(g.sGiantOutputFilename).st_size:
                sHeaderLine = g.sSeparator.join(lHeader)
                fhOutput.write(sHeaderLine + "\n")
                NTRC.tracef(3, "DCLN", "proc wroteheaderline|%s|" 
                    % (sHeaderLine))
            fhOutput.write(sLineOut + "\n")
            NTRC.tracef(0, "DCLN", "proc line appended to output \nsLineOut|%s|" 
                        % (sLineOut))

        # Probably record the done record in db.
        if g.sDoNotRecord.startswith("Y"):
            NTRC.tracef(0, "DCLN", "proc Done not recorded.")
        else:
            dResult = g.mdb.fndInsertDoneRecord(sInstructionId, dValues)

        # Probably delete the extract file.
        if g.sDoNotDelete.startswith("Y"):
            NTRC.tracef(0, "DCLN", "proc Input file not deleted.")
        else:
            os.remove(g.sInputFilename)
            NTRC.tracef(3,"DCLN", "proc fileremoved|%s|" 
                        % (g.sInputFilename))
            # And remove its in-progress record from the search db.
            g.mdb.fndDeleteProgressRecord(sInstructionId)
    else:
        # Duplicate instruction; do not add line to output file.
        NTRC.tracef(0, "DCLN", "proc line *NOT* appended to output file \n"
                    "sLineOut|%s|" 
                    % (sLineOut))

    NTRC.ntracef(0,"DCLN","datacleanup End.")
    return 0