def mDestroyCopy(self,mysCopyID): try: nCopyIndex = self.lCopyIDs.index(mysCopyID) except ValueError: NTRC.tracef(0, "SHLF", "BUGCHECK copyID not found for removal|%s|" % (mysCopyID)) return False # Remove doc and copy from current lists. del self.lCopyIDs[nCopyIndex] del self.lDocIDs[nCopyIndex] # Tell the server that the copy is gone. cCopy = G.dID2Copy[mysCopyID] sDocID = cCopy.sDocID self.cServer.mDestroyCopy(mysCopyID, sDocID, self.ID) # And give back the space it occupied. self.bContig = False cDoc = G.dID2Document[sDocID] # BZZZT: DO NOT put this region back into use. It has already # suffered an error once and caused a document to fail. #self.nFreeSpace += cDoc.nSize NTRC.tracef(3, "SHLF", "proc mDestroyCopy remove doc|%s| copy|%s| " "idx|%d| size|%d| from shelf|%s| remainingdocs|%d| free|%d|" % (cCopy.sDocID, mysCopyID, nCopyIndex, cDoc.nSize, self.ID, len(self.lCopyIDs), self.nFreeSpace)) # And, at long last, destroy the Copy oject itself. del cCopy return self.ID + "-" + sDocID + "-" + mysCopyID
def fntMatchValue(mysLine, mydVar): '''\ Extract value from line according to valueregex for var. If no value found, supply suitably disappointing string. Get the right word from the line. If asked for word zero, use the whole line. Makes the extraction harder, but sometimes necessary. ''' sWordnumber = mydVar["wordnumber"] nWordnumber = int(sWordnumber) lWords = mysLine.split() if nWordnumber == 0: sWord = mysLine elif nWordnumber <= len(lWords): sWord = lWords[nWordnumber - 1] else: sWord = "nowordhere_indexoutofrange" sValueregex = mydVar["valueregex"] sVarname = mydVar["varname"] oMatch = re.search(sValueregex, sWord) NTRC.tracef( 5, "MCHV", "proc MatchValue matching word var|%s| word|%s| valueregex|%s| matchobj|%s|" % (sVarname, sWord, sValueregex, oMatch)) if oMatch: # Word matches the valueregex. Save the value. sValue = oMatch.group(1) NTRC.tracef(3, "MCHV", "proc addvalue name|%s| val|%s|" % (sVarname, sValue)) else: # If not found, at least supply something conspicuous for printing. sValue = "novaluefound" return (sVarname, sValue)
def mDestroyCopy(self, mysCopyID): try: nCopyIndex = self.lCopyIDs.index(mysCopyID) except ValueError: NTRC.tracef( 0, "SHLF", "BUGCHECK copyID not found for removal|%s|" % (mysCopyID)) return False # Remove doc and copy from current lists. del self.lCopyIDs[nCopyIndex] del self.lDocIDs[nCopyIndex] # Tell the server that the copy is gone. cCopy = G.dID2Copy[mysCopyID] sDocID = cCopy.sDocID self.cServer.mDestroyCopy(mysCopyID, sDocID, self.ID) # And give back the space it occupied. self.bContig = False cDoc = G.dID2Document[sDocID] # BZZZT: DO NOT put this region back into use. It has already # suffered an error once and caused a document to fail. #self.nFreeSpace += cDoc.nSize NTRC.tracef( 3, "SHLF", "proc mDestroyCopy remove doc|%s| copy|%s| " "idx|%d| size|%d| from shelf|%s| remainingdocs|%d| free|%d|" % (cCopy.sDocID, mysCopyID, nCopyIndex, cDoc.nSize, self.ID, len(self.lCopyIDs), self.nFreeSpace)) # And, at long last, destroy the Copy oject itself. del cCopy return self.ID + "-" + sDocID + "-" + mysCopyID
def fntMatchValue(mysLine,mydVar): '''\ Extract value from line according to valueregex for var. If no value found, supply suitably disappointing string. Get the right word from the line. If asked for word zero, use the whole line. Makes the extraction harder, but sometimes necessary. ''' sWordnumber = mydVar["wordnumber"] nWordnumber = int(sWordnumber) lWords = mysLine.split() if nWordnumber == 0: sWord = mysLine elif nWordnumber <= len(lWords): sWord = lWords[nWordnumber-1] else: sWord = "nowordhere_indexoutofrange" sValueregex = mydVar["valueregex"] sVarname = mydVar["varname"] oMatch = re.search(sValueregex,sWord) NTRC.tracef(5,"MCHV","proc MatchValue matching word var|%s| word|%s| valueregex|%s| matchobj|%s|" % (sVarname,sWord,sValueregex,oMatch)) if oMatch: # Word matches the valueregex. Save the value. sValue = oMatch.group(1) NTRC.tracef(3,"MCHV","proc addvalue name|%s| val|%s|" % (sVarname,sValue)) else: # If not found, at least supply something conspicuous for printing. sValue = "novaluefound" return (sVarname,sValue)
def fndFormatQuery(self, mydCli, myg): ''' Take all the CLI options that might specify a searchable attribute, and construct a MongoDB or searchspace query dictionary. This is lots nastier than it first appears to be because json is so bloody picky. ''' dOut = dict() for sAttrib,sValue in mydCli.items(): result = None if sValue is not None: # Is it something valid in json? try: result = json.loads(sValue) except ValueError: # Is it a string that should be an integer, ok in json? try: result = int(sValue) except: # Is it a naked string for some string-valued var # that isn't just Y/N or a mandatory string? # Rule out dict values that are already formatted. if (isinstance(sValue, str) and sAttrib not in myg.lYesNoOptions and sAttrib not in myg.lMandatoryArgs and '{' not in sValue and '}' not in sValue and ':' not in sValue and ',' not in sValue ): result = '{"$eq":' + '"'+sValue+'"' + '}' else: result = sValue NTRC.tracef(3, "FMT", "proc FormatQuery notjson item " "key|%s| val|%s| result|%s|" % (sAttrib, sValue, result)) NTRC.tracef(3, "FMT", "proc FormatQuery item key|%s| val|%s| result|%s|" % (sAttrib, sValue, result)) # Can't process dicts thru json twice. if isinstance(result, dict): dOut[sAttrib] = sValue else: dOut[sAttrib] = result # Allow only attribs that appear in the database, else will get # no results due to implied AND of all items in query dict. dOutSafe = {k:v for k,v in dOut.items() if k in myg.lSearchables} dOutNotNone = {k:v for k,v in dOutSafe.items() if v is not None} NTRC.ntracef(3,"FMT","proc dict b4|%s| \nsafe|%s|\nclean|%s|" % (dOut,dOutSafe,dOutNotNone)) if "sQuery" in dOutNotNone.keys(): # If the brave user has supplied a full, standalone query string, # add its contents to the query dict so far. dTmp = dOutNotNone["sQuery"] del dOutNotNone["sQuery"] dOutNotNone.update(dTmp) return dOutNotNone
def fndFormatQuery(self, mydCli, myg): ''' Take all the CLI options that might specify a searchable attribute, and construct a MongoDB or searchspace query dictionary. This is lots nastier than it first appears to be because json is so bloody picky. ''' dOut = dict() for sAttrib, sValue in mydCli.items(): result = None if sValue is not None: # Is it something valid in json? try: result = json.loads(sValue) except ValueError: # Is it a string that should be an integer, ok in json? try: result = int(sValue) except: # Is it a naked string for some string-valued var # that isn't just Y/N or a mandatory string? # Rule out dict values that are already formatted. if (isinstance(sValue, str) and sAttrib not in myg.lYesNoOptions and sAttrib not in myg.lMandatoryArgs and '{' not in sValue and '}' not in sValue and ':' not in sValue and ',' not in sValue): result = '{"$eq":' + '"' + sValue + '"' + '}' else: result = sValue NTRC.tracef( 3, "FMT", "proc FormatQuery notjson item " "key|%s| val|%s| result|%s|" % (sAttrib, sValue, result)) NTRC.tracef( 3, "FMT", "proc FormatQuery item key|%s| val|%s| result|%s|" % (sAttrib, sValue, result)) # Can't process dicts thru json twice. if isinstance(result, dict): dOut[sAttrib] = sValue else: dOut[sAttrib] = result # Allow only attribs that appear in the database, else will get # no results due to implied AND of all items in query dict. dOutSafe = {k: v for k, v in dOut.items() if k in myg.lSearchables} dOutNotNone = {k: v for k, v in dOutSafe.items() if v is not None} NTRC.ntracef( 3, "FMT", "proc dict b4|%s| \nsafe|%s|\nclean|%s|" % (dOut, dOutSafe, dOutNotNone)) if "sQuery" in dOutNotNone.keys(): # If the brave user has supplied a full, standalone query string, # add its contents to the query dict so far. dTmp = dOutNotNone["sQuery"] del dOutNotNone["sQuery"] dOutNotNone.update(dTmp) return dOutNotNone
def mScheduleGlitch(self): '''Wait for a glitch lifetime on this shelf. If the shelf died as a result of the glitch, stop rescheduling. ''' fNow = G.env.now NTRC.tracef( 3, "LIFE", "proc schedule glitch t|%d| shelf|%s| alive|%s|" % (fNow, self.sShelfID, self.cShelf.mbIsShelfAlive())) while 1: fNow = G.env.now bAlive = self.cShelf.mbIsShelfAlive() if bAlive: self.fShelfLife = self.mfCalcCurrentGlitchLifetime(fNow) if self.fShelfLife > 0 and bAlive: self.fShelfInterval = util.makeexpo(self.fShelfLife) lg.logInfo( "LIFETIME", "schedule t|%6.0f| for shelf|%s| " "interval|%.3f| freq|%d| life|%.3f|" % (fNow, self.sShelfID, self.fShelfInterval, self.nGlitchFreq, self.fShelfLife)) NTRC.tracef( 3, "LIFE", "proc schedule glitch shelf|%s| " "interval|%.3f| based on life|%.3f| alive|%s| " "waiting..." % (self.sShelfID, self.fShelfInterval, self.fShelfLife, bAlive)) yield G.env.timeout(self.fShelfInterval) # ****** Glitch has now occurred. ****** # If correlated failure, step entirely outside the # Lifetime-Shelf-Server context to signal several servers. if self.nGlitchSpan > 1: from server import CServer CServer.fnCorrFailHappensToAll(self.nGlitchSpan) else: self.mGlitchHappensNow() else: NTRC.ntracef( 3, "LIFE", "proc glitch no freq or not alive, " "set wait to infinity shelf|%s| freq|%d| life|%.3f| " "interval|%.3f|" % (self.sShelfID, self.nGlitchFreq, self.fShelfLife, self.fShelfInterval)) yield G.env.timeout(G.fInfinity) else: break # Because we have to use fako "while 1". # When shelf is not alive anymore, wait forever NTRC.ntracef( 3, "LIFE", "proc glitch shelf no longer alive, set wait " "to infinity shelf|%s| freq|%d| life|%.3f| interval|%.3f|" % (self.sShelfID, self.nGlitchFreq, self.fShelfLife, self.fShelfInterval)) yield G.env.timeout(G.fInfinity)
def fntDoesLineMatchThisVar(mysLine, mynLineNr, mysVarname): '''\ Check line against lineregex of var. Return tuple (matchobject, line, varname). ''' dVar = g.dVars[mysVarname] sLineregex = dVar["lineregex"] oMatch = re.search(sLineregex,mysLine) NTRC.tracef(5,"MTLN","proc MatchLine try regex|%s| var|%s| nr|%s| line|%s| match|%s|" % (sLineregex,mysVarname,mynLineNr,mysLine,oMatch)) if oMatch: NTRC.tracef(3,"LINE","proc MatchLine found line|%s|=|%s| var|%s| regex|%s|" % (mynLineNr,mysLine,mysVarname,sLineregex)) return (oMatch, mysLine, mysVarname)
def fnldParseInput(mysFilename): ''' Return tuple containing - the output template string, - a list, one item per line, of dicts of column args from the csv that contain instructions for getting variable values from lines. Beware duck-type integers that become strings. Format of csv lines: varname,regex to find line,split word number,regex to strip out value instruction file format: ##becomes comment in output ###send out this string as header for the output, no hashes =outputformat format string =variables varname,lineregex,wordnumber,valueregex (header) (lines of csv data) ''' dParams = dict() with open(mysFilename,"rb") as fhInfile: # Remove comments. lLines = filter( lambda sLine: \ not re.match("^ *#[^#]",sLine) \ and not re.match("^ *$",sLine.rstrip()) \ , fhInfile ) # Get the output template. It may be longer than one line. lTemplate = fnlLinesInRange(lLines,"^=template","^=variables") lTemplate = map( lambda sLine: sLine.rstrip().replace("###","").replace("##","#"), lTemplate ) NTRC.tracef(3,"INPT","proc ParseInput template|%s|" % (lTemplate)) # Fix the separator in the template according to the user spec. lAllTemplateNames = [lTemplateLine.split() for lTemplateLine in lTemplate] lNewTemplate = [g.sSeparator.join(lTemplateNamesOneLine) \ for lTemplateNamesOneLine in lAllTemplateNames] # Now get the CSV args into a dictionary of dictionaries. lVarLines = fnlLinesInRange(lLines,"^=variables","^=thiswillnotbefound") lRowDicts = csv.DictReader(lVarLines) NTRC.tracef(5,"INPT","proc ParseInput lRowDicts all|%s|" % (lRowDicts)) dParams = dict( map( lambda dRowDict: \ (dRowDict["varname"],dRowDict) \ , lRowDicts )) return (lNewTemplate,dParams)
def mDestroyCopy(self, mysCopyID, mysDocID, mysShelfID): ''' Oops, a doc died, maybe just one or maybe the whole shelf. ''' NTRC.tracef( 3, "SERV", "proc mDestroyCopy remove copy|%s| doc|%s| " "from shelf|%s|" % (mysCopyID, mysDocID, mysShelfID)) # Inform the client that the copy is gonzo. cClient = G.dID2Client[self.dDocIDs[mysDocID]] cClient.mDestroyCopy(mysDocID, self.ID, mysCopyID) # Clear out local traces of the doc and copy. self.lDocIDs.remove(mysDocID) del self.dDocIDs[mysDocID] # The Shelf will nuke the copy, because it created it. return self.ID + "-" + mysDocID
def mInjectError(self, mynReduction, mynDecayHalflife, mynGlitchMaxlife): '''\ When a glitch occurs, decrease lifetime by some amount, percentage. The decrease decays exponentially at some rate until negligible. ''' self.nReductionPercentage = mynReduction self.fDecayHalflife = float(mynDecayHalflife) self.fDecayRate = self.fLn2 / self.fDecayHalflife self.fMaxlife = float(mynGlitchMaxlife) NTRC.tracef( 3, "LIFE", "proc inject reduct|%s| decayhalflife|%s| " "decayrate|%s| maxlife|%s|" % (self.nReductionPercentage, self.fDecayHalflife, self.fDecayRate, self.fMaxlife)) return self.fDecayRate
def fntDoesLineMatchThisVar(mysLine, mynLineNr, mysVarname): '''\ Check line against lineregex of var. Return tuple (matchobject, line, varname). ''' dVar = g.dVars[mysVarname] sLineregex = dVar["lineregex"] oMatch = re.search(sLineregex, mysLine) NTRC.tracef( 5, "MTLN", "proc MatchLine try regex|%s| var|%s| nr|%s| line|%s| match|%s|" % (sLineregex, mysVarname, mynLineNr, mysLine, oMatch)) if oMatch: NTRC.tracef( 3, "LINE", "proc MatchLine found line|%s|=|%s| var|%s| regex|%s|" % (mynLineNr, mysLine, mysVarname, sLineregex)) return (oMatch, mysLine, mysVarname)
def mAddDocument(self, mysDocID, mysClientID): ''' Find a shelf with room for the doc, or create one. Put the doc on the shelf, decrement the remaining space. ''' # If the server is already dead, do not accept any documents. if not self.bDead: cDoc = G.dID2Document[mysDocID] nSize = cDoc.nSize # Find a shelf with sufficient empty space and place the doc there. cShelf = None for sShelfID in self.lShelfIDs: cShelf = G.dID2Shelf[sShelfID] bResult = cShelf.mAcceptDocument(mysDocID, nSize, mysClientID) if bResult: break # True = doc has been stored else: continue # False = no, try another shelf, if any else: # If no more shelves, create another and use it. sNewShelfID = self.mCreateShelf() self.lShelfIDs.append(sNewShelfID) cShelf = G.dID2Shelf[sNewShelfID] sShelfID = cShelf.ID # TODO: #Why not just use sNewShelfID? result = cShelf.mAcceptDocument(mysDocID, nSize, mysClientID) # Record that the doc has been stored on this server. self.lDocIDsComplete.append(mysDocID) self.bInUse = True self.lDocIDs.append(mysDocID) self.dDocIDs[mysDocID] = mysClientID NTRC.tracef( 3, "SERV", "proc mAddDocument serv|%s| id|%s| " "docid|%s| size|%s| assigned to shelfid|%s| remaining|%s|" % (self.sName, self.ID, mysDocID, cDoc.nSize, sShelfID, cShelf.nFreeSpace)) return self.ID + "+" + sShelfID + "+" + mysDocID else: NTRC.ntracef( 3, "SERV", "proc mAddDocument1 dead server|%s| do not " "add doc|%s| for client|%s|" % (self.ID, mysDocID, mysClientID)) return False
def mAddDocument(self, mysDocID, mysClientID): ''' Add a document to this shelf and record some information in the document itself. ''' self.lDocIDs.append(mysDocID) self.lDocIDsComplete.append(mysDocID) self.lClientIDs.append(mysClientID) cDoc = G.dID2Document[mysDocID] nSize = cDoc.nSize # Make a copy of the document and shelve that. cCopy = CCopy(mysDocID, mysClientID, self.sServerID) sCopyID = cCopy.ID NTRC.tracef( 3, "SHLF", "proc mAddDocument made copy|%s| of doc|%s| " "from client|%s|" % (sCopyID, mysDocID, mysClientID)) # Where does document go on this shelf. Closed interval [Begin,End]. # nBlkBegin = self.nCapacity - self.nFreeSpace # BZZZT: Never reuse space. Any empty space in the area that # *used* to be occupied by documents has already been damaged # and destroyed a document. Do not reuse the space. # Yeah, I know it's all just hypothetical, but why not. nBlkBegin = self.nHiWater + 1 self.nFreeSpace -= nSize nBlkEnd = nBlkBegin + nSize - 1 if nBlkEnd > self.nHiWater: self.nHiWater = nBlkEnd # Last block used. # sShelfID = self.ID # sServerID = self.sServerID cCopy.mShelveCopy(self.sServerID, self.ID, nBlkBegin, nBlkEnd) self.lCopyIDs.append(sCopyID) self.lCopyIDsComplete.append(sCopyID) self.lCopyTops.append(nBlkEnd) cDoc.mCopyPlacedOnServer(sCopyID, self.sServerID) NTRC.tracef( 5, "SHLF", "proc mAddDocument add doc|%s| to shelf|%s| " "size|%d| remaining|%d|" % (mysDocID, self.ID, nSize, self.nFreeSpace)) return self.sServerID + "+" + self.ID + "+" + mysDocID + "+" + sCopyID
def mAuditCollection(self, mynCycleInterval, mynSegments, mysCollectionID, myeCallerSyncEvent): '''\ SimPy generator to audit an entire collection. Divide the collection into segments and schedule audits for each segment in turn. ''' fTimeCycleBegin = G.env.now lg.logInfo("AUDIT2","begin colln t|%10.3f| auditid|%s| cycle|%s| cli|%s| coll|%s|" % (G.env.now,self.ID,self.nNumberOfCycles,self.sClientID,self.sCollectionID)) for iThisSegment in range(mynSegments): tSegmentStartTime = G.env.now nSegmentInterval = self.mCalcSegmentInterval(mynCycleInterval, mynSegments) bLastSegment = (iThisSegment == mynSegments-1) self.lDocsThisSegment = self.mIdentifySegment(mysCollectionID, mynSegments, iThisSegment) eSyncEvent = G.env.event() G.env.process( self.mAuditSegment(iThisSegment, self.lDocsThisSegment, mysCollectionID, eSyncEvent)) # Wait for completion of segment and its allotted time. yield eSyncEvent tNextSegmentStartTime = tSegmentStartTime + nSegmentInterval NTRC.tracef(3, "AUD2", "proc AuditCollection1 now|%s| tstart|%s| " "tnext|%s| tinterval|%s| blastseg|%s|" % (G.env.now, tSegmentStartTime, tNextSegmentStartTime, nSegmentInterval, bLastSegment)) yield G.env.timeout(tNextSegmentStartTime - G.env.now) fTimeCycleEnd = G.env.now self.fTimeCycleLength = fTimeCycleEnd - fTimeCycleBegin lg.logInfo("AUDIT2", "end colln t|%10.3f| auditid|%s| cycle|%s| " "cli|%s| coll|%s| repairs|%d| total|%d| perms|%d| " "majority|%s| minority|%d| duration|%9.3f|" % (G.env.now, self.ID, self.nNumberOfCycles, self.sClientID, self.sCollectionID, self.nRepairsThisCycle, self.nRepairsTotal, self.nPermanentLosses, self.nRepairsMajority, self.nRepairsMinority, self.fTimeCycleLength)) # Tell the caller that we finished. myeCallerSyncEvent.succeed(value=self.nNumberOfCycles)
def mGlitchHappens(self, myfNow): self.bGlitchActive = True self.nGlitches += 1 G.nGlitchesTotal += 1 lg.logInfo( "LIFETIME", "glitch t|%6.0f| on shelf|%s| num|%s| " "impactpct|%d| decayhalflife|%d| span|%d| maxlife|%d| gtotal|%s|" % (myfNow, self.sShelfID, self.nGlitches, self.nImpactReductionPct, self.nGlitchDecayHalflife, self.nGlitchSpan, self.nGlitchMaxlife, G.nGlitchesTotal)) self.fGlitchBegin = float(G.env.now) NTRC.tracef( 3, "LIFE", "proc happens1 t|%.3f| shelf|%s| num|%s| impact|%d| " "decayhalflife|%d| span|%d| maxlife|%d|" % (myfNow, self.sShelfID, self.nGlitches, self.nImpactReductionPct, self.nGlitchDecayHalflife, self.nGlitchSpan, self.nGlitchMaxlife)) ''' If this is a 100% glitch: - Declare server, not just shelf, to be dead. - Auditor will eventually discover the problem and call client to inform that server is dead. ''' sServerID = self.cShelf.sServerID if G.dID2Server[sServerID].bDead or self.nImpactReductionPct == 100: self.cShelf.bAlive = False #sServerID = self.cShelf.sServerID cServer = G.dID2Server[sServerID] NTRC.ntracef( 3, "LIFE", "proc happens2 glitch 100pct or server dead " "id|%s| shelf|%s| svr|%s|" % (self.ID, self.cShelf.ID, sServerID)) cServer.mServerDies() NTRC.ntracef( 3, "LIFE", "proc happens3 life|%s| killed server |%s|" % (self.ID, sServerID)) lg.logInfo( "LIFETIME", "100pct glitch on shelf |%s| " "of server|%s| - all docs lost" % (self.sShelfID, sServerID)) else: self.mInjectError(self.nImpactReductionPct, self.nGlitchDecayHalflife, self.nGlitchMaxlife) return (self.nGlitches, self.sShelfID)
def mAddDocument(self, mysDocID, mysClientID): ''' Add a document to this shelf and record some information in the document itself. ''' self.lDocIDs.append(mysDocID) self.lDocIDsComplete.append(mysDocID) self.lClientIDs.append(mysClientID) cDoc = G.dID2Document[mysDocID] nSize = cDoc.nSize # Make a copy of the document and shelve that. cCopy = CCopy(mysDocID, mysClientID, self.sServerID) sCopyID = cCopy.ID NTRC.tracef(3,"SHLF","proc mAddDocument made copy|%s| of doc|%s| " "from client|%s|" % (sCopyID, mysDocID, mysClientID)) # Where does document go on this shelf. Closed interval [Begin,End]. # nBlkBegin = self.nCapacity - self.nFreeSpace # BZZZT: Never reuse space. Any empty space in the area that # *used* to be occupied by documents has already been damaged # and destroyed a document. Do not reuse the space. # Yeah, I know it's all just hypothetical, but why not. nBlkBegin = self.nHiWater + 1 self.nFreeSpace -= nSize nBlkEnd = nBlkBegin + nSize - 1 if nBlkEnd > self.nHiWater: self.nHiWater = nBlkEnd # Last block used. # sShelfID = self.ID # sServerID = self.sServerID cCopy.mShelveCopy(self.sServerID, self.ID, nBlkBegin, nBlkEnd) self.lCopyIDs.append(sCopyID) self.lCopyIDsComplete.append(sCopyID) self.lCopyTops.append(nBlkEnd) cDoc.mCopyPlacedOnServer(sCopyID, self.sServerID) NTRC.tracef(5,"SHLF","proc mAddDocument add doc|%s| to shelf|%s| " "size|%d| remaining|%d|" % (mysDocID,self.ID,nSize,self.nFreeSpace)) return self.sServerID+"+"+self.ID+"+"+mysDocID+"+"+sCopyID
def main(mysInputFilename): pass # Create output template. lTemplate = map(lambda field: ("{" + field + "}"), g.lCoreColumns) sTemplate = " ".join(lTemplate) # Process file. with open(mysInputFilename, "r") as fhIn: nErrors = 0 lErrors = [] oReader = csv.reader(fhIn, delimiter=g.sSeparator) # First line better be the header. lHeader = next(oReader) NTRC.tracef(3, "NARO", "proc lHeader|%s|" % (lHeader)) # For each data line, create dict of values and map them into # the reduced-width output template. print(g.sCoreColumns) nLine = 1 # Count the header line as 1. for lValues in oReader: NTRC.tracef(3, "NARO", "proc lValues|%s|" % (lValues)) dValues = dict(zip(lHeader, lValues)) NTRC.tracef(3, "NARO", "proc dValues|%s|" % (dValues)) sOut = sTemplate.format(**dValues) nLine += 1 print(sOut) if "nolinefound" in sOut: nErrors += 1 lErrors.append(nLine) if nErrors > 0: print("#ERROR - MISSING DATA nolinefound at %s" % (lErrors))
def mAge_shelf(self, mynLifeParam): ''' An entire shelf fails. Remove all the docs it contained. Eventually, this will trigger a repair event and make the collection more vulnerable during the repair. ''' fShelfLife = util.makeexpo(mynLifeParam) lg.logInfo( "SERVER", "mAge_shelf set lifetime time|%6.0f| shelf|%s| " "next lifetime|%.3f|khr" % (G.env.now, self.ID, fShelfLife)) NTRC.tracef( 3, "SHLF", "proc mAge_shelf time|%6.0f| shelf|%s| " "next lifetime|%.3f|khr" % (G.env.now, self.ID, fShelfLife)) yield G.env.timeout(fShelfLife) # S H E L F F A I L S G.nTimeLastEvent = G.env.now self.bAlive = False # Shelf can no longer be used to store docs. NTRC.tracef( 3, "SHLF", "proc mAge_shelf time|%d| shelf|%s| shelf_error" % (G.env.now, self.ID)) lg.logInfo( "SERVER", "storage shelf failed time|%6.0f| server|%s| " "shelf|%s| lost |%d| docs" % (G.env.now, self.sServerID, self.ID, len(self.lCopyIDs))) # This whole shelf is a goner. Kill it. NTRC.tracef( 5, "SHLF", "proc mAge_shelf kill contents ldocs|%s| " "lcopies|%s|" % (self.lDocIDs, self.lCopyIDs)) # Note that we have to copy the list before modifying it and # iterate over the copy of the list. # Standard problem with updating an iterable inside the for loop. templCopyIDs = copy.deepcopy(self.lCopyIDs) for sCopyID in templCopyIDs: sDocID = G.dID2Copy[sCopyID].sDocID self.mDestroyCopy(sCopyID) # G.dID2Server[self.sServerID].mDestroyDocument(sDocID,self.ID) G.dID2Server[self.sServerID].mDestroyCopy(sCopyID, sDocId, self.ID) self.mReportDocumentLost(sDocID) NTRC.tracef( 3, "FAIL", "proc t|%d| shelf failure server|%s| qual|%d| " "shelf|%s| docs|%d|" % (G.env.now, self.sServerID, G.dID2Server[self.sServerID].nQual, self.ID, len(templCopyIDs)))
def mAge_shelf(self, mynLifeParam): ''' An entire shelf fails. Remove all the docs it contained. Eventually, this will trigger a repair event and make the collection more vulnerable during the repair. ''' fShelfLife = util.makeexpo(mynLifeParam) lg.logInfo("SERVER", "mAge_shelf set lifetime time|%6.0f| shelf|%s| " "next lifetime|%.3f|khr" % (G.env.now,self.ID,fShelfLife)) NTRC.tracef(3, "SHLF", "proc mAge_shelf time|%6.0f| shelf|%s| " "next lifetime|%.3f|khr" % (G.env.now,self.ID,fShelfLife)) yield G.env.timeout(fShelfLife) # S H E L F F A I L S G.nTimeLastEvent = G.env.now self.bAlive = False # Shelf can no longer be used to store docs. NTRC.tracef(3, "SHLF", "proc mAge_shelf time|%d| shelf|%s| shelf_error" % (G.env.now,self.ID)) lg.logInfo("SERVER", "storage shelf failed time|%6.0f| server|%s| " "shelf|%s| lost |%d| docs" % (G.env.now,self.sServerID,self.ID,len(self.lCopyIDs))) # This whole shelf is a goner. Kill it. NTRC.tracef(5, "SHLF", "proc mAge_shelf kill contents ldocs|%s| " "lcopies|%s|" % (self.lDocIDs,self.lCopyIDs)) # Note that we have to copy the list before modifying it and # iterate over the copy of the list. # Standard problem with updating an iterable inside the for loop. templCopyIDs = copy.deepcopy(self.lCopyIDs) for sCopyID in templCopyIDs: sDocID = G.dID2Copy[sCopyID].sDocID self.mDestroyCopy(sCopyID) # G.dID2Server[self.sServerID].mDestroyDocument(sDocID,self.ID) G.dID2Server[self.sServerID].mDestroyCopy(sCopyID,sDocId,self.ID) self.mReportDocumentLost(sDocID) NTRC.tracef(3, "FAIL", "proc t|%d| shelf failure server|%s| qual|%d| " "shelf|%s| docs|%d|" % (G.env.now, self.sServerID, G.dID2Server[self.sServerID].nQual, self.ID,len(templCopyIDs)))
def logSetConfig(mysLogLevel, mysLogFile): lLogLevels = 'NOTSET CRITICAL ERROR WARNING INFO DEBUG'.split() sLogLevel = mysLogLevel.upper() if sLogLevel not in lLogLevels: NTRC.tracef(0, "LGOU", "ERROR unrecognized logging level|%s|" % (mysLogLevel)) sLogLevel = "NOTSET" # Set the logging level for this session. NTRC.tracef(3, "LGOU", "proc sLogLevel|%s|" % (sLogLevel)) logger.setLevel(sLogLevel.upper()) ''' Set the output file for logging. Either to a filename in LOG_FILE param or environ variable, or to the console using StreamHandler. ''' if mysLogFile != "" \ and mysLogFile != " " \ and mysLogFile != "-" \ and mysLogFile.upper() != "NONE" \ and mysLogFile.upper() != "CONSOLE" \ and mysLogFile.upper() != "STDOUT" : channel = logging.FileHandler(mysLogFile) else: channel = logging.StreamHandler() NTRC.tracef(3, "LGOU", "proc set log handler mysLogFile|%s|" % (mysLogFile)) ''' Adjust the format of log output to match the time stamps we have used in TRACE forever. ''' # Create formatter instance. formatter = logging.Formatter( fmt='%(asctime)s %(name)s %(levelname)s - %(message)s', datefmt='%Y%m%d_%H%M%S') # Add formatter to the output channel. channel.setFormatter(formatter) # Finally, add the channel handler to the logger. logger.addHandler(channel) return logger
def logSetConfig(mysLogLevel,mysLogFile): lLogLevels = 'NOTSET CRITICAL ERROR WARNING INFO DEBUG'.split() sLogLevel = mysLogLevel.upper() if sLogLevel not in lLogLevels: NTRC.tracef(0,"LGOU","ERROR unrecognized logging level|%s|" % (mysLogLevel)) sLogLevel = "NOTSET" # Set the logging level for this session. NTRC.tracef(3,"LGOU","proc sLogLevel|%s|"%(sLogLevel)) logger.setLevel(sLogLevel.upper()) ''' Set the output file for logging. Either to a filename in LOG_FILE param or environ variable, or to the console using StreamHandler. ''' if mysLogFile != "" \ and mysLogFile != " " \ and mysLogFile != "-" \ and mysLogFile.upper() != "NONE" \ and mysLogFile.upper() != "CONSOLE" \ and mysLogFile.upper() != "STDOUT" : channel = logging.FileHandler(mysLogFile) else: channel = logging.StreamHandler() NTRC.tracef(3,"LGOU","proc set log handler mysLogFile|%s|" % (mysLogFile)) ''' Adjust the format of log output to match the time stamps we have used in TRACE forever. ''' # Create formatter instance. formatter = logging.Formatter(fmt='%(asctime)s %(name)s %(levelname)s - %(message)s', datefmt='%Y%m%d_%H%M%S') # Add formatter to the output channel. channel.setFormatter(formatter) # Finally, add the channel handler to the logger. logger.addHandler(channel) return logger
def main(): ''' Process: - Parse the CLI command into g.various data items. - Validate user-supplied directories; get environment variables. - Query the searchspace for the stream of instructions - For each instruction from database selection, get dict for line - Using dict args, construct plausible command lines, into file - Check to see that there aren't too many similar processes already running; if too many, then wait. - Launch ListActor process to execute commands. - Wait a polite interval before launching another. ''' NTRC.ntracef(0, "MAIN", "Begin.") NTRC.ntracef(0, "MAIN", "TRACE traceproduction|%s|" % NTRC.isProduction()) def fnbQEnd(): return g.bLast sBrokerCommand = fnsReconstituteCommand(sys.argv) fnbMaybeLogCommand(sBrokerCommand) NTRC.ntracef(0, "MAIN", "command=|%s|" % (sBrokerCommand.rstrip())) # Get args from CLI and put them into the global data dCliDict = brokercli.fndCliParse("") # Carefully insert any new CLI values into the Global object. dCliDictClean = { k: util.fnIntPlease(v) for k, v in dCliDict.items() if v is not None } g.__dict__.update(dCliDictClean) # Validate that the user-specified directories exist. if not fnbValidateDir(g.sFamilyDir): raise ValueError("FamilyDir \"%s\" not found" % (g.sFamilyDir)) if not fnbValidateDir("%s/%s" % (g.sFamilyDir, g.sSpecificDir)): raise ValueError("SpecificDir \"%s\" not found" % (g.sSpecificDir)) # Get command templates from external file. fnGetCommandTemplates(g.sCommandListFilename) # Construct database query for this invocation. g.cFmt = brokerformat.CFormat() dQuery = g.cFmt.fndFormatQuery(dCliDict, g) # Look for overriding environment variables fnvGetEnvironmentOverrides() # Open the database to keep "done" records, # and delete moldy, old in-progress records. g.mdb = searchdatabasemongo.CSearchDatabase( g.sSearchDbMongoName, g.sSearchDbProgressCollectionName, g.sSearchDbDoneCollectionName) g.mdb.fnvDeleteProgressCollection() # Get the set of instructions for today from database. NTRC.tracef( 0, "MAIN", "proc querydict2|%s|" % (list(util.fngSortDictItemsByKeys(dQuery)))) itAllInstructions = searchspace.fndgGetSearchSpace(g.sInsDir, g.sInsTyp, dQuery) # Start the start-end threads. nb.fntRunEverything(g, g.qInstructions, fnbQEnd, g.nCoreTimer, g.nStuckLimit) # If this wasn't just a listonly run, do all the cases. if not g.sListOnly.startswith("Y"): NTRC.ntracef(3, "MAIN", "proc all instr|%s|" % (g.lGiantInstr)) else: NTRC.ntracef(0, "MAIN", "Listonly.") nRuns = fnnProcessAllInstructions(itAllInstructions) NTRC.ntracef(0, "MAIN", "End queued all runs ncases|%s|" % (g.nCases, ))
def fnldParseInput(mysFilename): ''' Return tuple containing - the output template string, - a list, one item per line, of dicts of column args from the csv that contain instructions for getting variable values from lines. Beware duck-type integers that become strings. Format of csv lines: varname,regex to find line,split word number,regex to strip out value instruction file format: ##becomes comment in output ###send out this string as header for the output, no hashes =outputformat format string =variables varname,lineregex,wordnumber,valueregex (header) (lines of csv data) ''' dParams = dict() with open(mysFilename, "r", encoding="'utf-8") as fhInfile: # Remove comments. lLines = list( filter( lambda sLine: not re.match("^ *#[^#]", sLine) and not re.match( "^ *$", sLine.rstrip()), fhInfile)) # Get the output template. It may be longer than one line. lTemplate = fnlLinesInRange(lLines, "^=template", "^=variables") lTemplate = list( map( lambda sLine: sLine.rstrip().replace("###", "").replace( "##", "#"), lTemplate)) NTRC.tracef(3, "INPT", "proc ParseInput1 template|%s|" % (lTemplate)) # Fix the separator in the template according to the user spec. lAllTemplateNames = [ lTemplateLine.split() for lTemplateLine in lTemplate ] lNewTemplate = [ g.sSeparator.join(lTemplateNamesOneLine) for lTemplateNamesOneLine in lAllTemplateNames ] NTRC.tracef(3, "INPT", "proc ParseInput2 template|%s|" % (lNewTemplate)) # Now get the CSV args into a dictionary of dictionaries. lVarLines = fnlLinesInRange(lLines, "^=variables", "^=thiswillnotbefound") lRowDicts = csv.DictReader(lVarLines) NTRC.tracef(5, "INPT", "proc ParseInput3 lRowDicts all|%s|" % (lRowDicts)) dParams = dict( map( lambda dRowDict: \ (dRowDict["varname"],dRowDict) \ , lRowDicts )) return (lNewTemplate, dParams)
def mAge_sector(self): ''' A sector in the shelf fails. This corrupts a document. For the moment, assume that it destroys the document. Eventually, it will have a probability of destroying the document depending on the portion of the document corrupted and the sensitivity of the document to corruption (e.g., compressed or encrypted), or the failure hits an encryption or license key. ''' # If the shelf has been emptied by a shelf failure, stop # caring about sector failures. while self.bAlive: # Sector lifetime depends on shelf lifetime and glitch age. fNow = G.env.now cLifetime = G.dID2Lifetime[self.sSectorLifetimeID] fLifetimeNow = cLifetime.mfCalcCurrentSectorLifetime(fNow) fSectorLifeInterval = util.makeexpo(fLifetimeNow) NTRC.tracef(3, "SHLF", "proc mAge_sector time|%d| shelf|%s| " "next interval|%.3f|hr from life rate|%.3f|hr" % (G.env.now, self.ID, fSectorLifeInterval, fLifetimeNow)) yield G.env.timeout(fSectorLifeInterval) # S E C T O R E R R O R self.nSectorHits += 1 G.nTimeLastEvent = G.env.now NTRC.tracef(3, "SHLF", "proc mAge_sector time|%d| shelf|%s| " "Sector_error hits|%d| emptyhits|%d|" % (G.env.now, self.ID, self.nSectorHits, self.nEmptySectorHits)) # Select a victim Document, probability proportional to size. # Small error, size=1. What doc dies as a result? sCopyVictimID = self.mSelectVictimCopy(mynErrorSize=1) # New version: compress strings of consecutive misses into single line. # Normally we log one line per error regardless of whether it hits or # misses a document. That results in hideously long log files for # sparse storage structures, like small docs on large shelf. # Count consecutive misses, and issue one summary line before the # next hit. # CANDIDATE FOR REFACTORING if sCopyVictimID: # Hidden error in victim doc. # Destroy copy on this shelf. cCopy = G.dID2Copy[sCopyVictimID] sDocID = cCopy.mGetDocID() self.mDestroyCopy(sCopyVictimID) # Log the summary line if we just ended a string of misses if self.nConsecutiveMisses > 0: lg.logInfo("SERVER", "small error t|%6.0f| svr|%s| " "shelf|%s| consecutive misses|%d|" % (G.env.now, self.sServerID, self.ID, self.nConsecutiveMisses)) self.nConsecutiveMisses = 0 lg.logInfo("SERVER", "small error t|%6.0f| svr|%s| " "shelf|%s| hidden failure in copy|%s| doc|%s|" % (G.env.now,self.sServerID,self.ID,sCopyVictimID,sDocID)) NTRC.tracef(3, "FAIL", "proc t|%d| sector failure server|%s| " "qual|%d| shelf|%s| doc|%s| copy|%s|" % (G.env.now, self.sServerID, G.dID2Server[self.sServerID].nQual, self.ID, sDocID, sCopyVictimID)) else: # No victim, hit empty space. self.nEmptySectorHits += 1 NTRC.tracef(3, "SHLF", "proc mAge_sector shelf|%s| " "sector error fell in empty space" % (self.ID)) if self.nConsecutiveMisses == 0: lg.logInfo("SERVER", "small error t|%6.0f| svr|%s| " "shelf|%s| hidden failure in copy|%s|" % (G.env.now, self.sServerID, self.ID, sCopyVictimID)) self.nConsecutiveMisses += 1 NTRC.tracef(3, "FAIL", "proc t|%d| sector failure server|%s| " "qual|%d| shelf|%s| copy|%s|" % (G.env.now, self.sServerID, G.dID2Server[self.sServerID].nQual, self.ID, sCopyVictimID)) # Initiate a repair of the dead document. # BZZZT NYI: currently all such failures are silent, so they are # not detected by the client until audited (or end of run). # Shelf is no longer alive, so we do not notice or schedule # future sector errors. Log the event. lg.logInfo("SHELF ", "t|%6.0f| dead shelf|%s| of svr|%s|, " "no future errors" % (G.env.now, self.ID, self.sServerID))
def mSelectVictimCopy(self, mynErrorSize): ''' Which doc copy on this shelf, if any, was hit by this error? Throw a uniform dart at all the docs on the shelf, see which one gets hit, or dart falls into empty space. Doc size counts. ''' nRandomSpot = util.makeunif(1, self.nCapacity + mynErrorSize - 1) nLoc = 0 NTRC.tracef(5, "SHLF", "proc SelectVictimCopy0 wherehit spot|%s| " "hiwater|%s| shelfid|%s| capacity|%s|" % (nRandomSpot,self.nHiWater,self.ID,self.nCapacity)) # First, check to see if the failure is maybe in an occupied region. if nRandomSpot <= self.nHiWater: # Find the document hit by the error. May have been hit before, too. # New version, vanilla binary search with adjacent interval checking # on list of all locations assigned on this shelf. # After you find the location, check to see that it # is still occupied by live copy. nLen = len(self.lCopyIDsComplete) nDist = (nLen + 1) / 2 nLoc = nDist NTRC.tracef(5, "SHLF", "proc SelectVictimCopy0 searchsetup len|%s| " "loc|%s| dist|%s|" % (nLen, nLoc, nDist)) while 1: if nLoc <= 0: nLoc = 1 if nLoc >= nLen: nLoc = nLen - 1 nDist = (nDist + 1) / 2 if nDist == 0: nDist = 1 nTop = self.lCopyTops[nLoc] nBottom = self.lCopyTops[nLoc-1] sCopyID = self.lCopyIDsComplete[nLoc-1] sDocID = self.lDocIDsComplete[nLoc-1] cCopy = G.dID2Copy[sCopyID] if nRandomSpot <= nTop: # Lower than top, look down. if nRandomSpot >= nBottom: # Found to left of nLoc. NTRC.tracef(5, "SHLF", "proc SelectVictimCopy5D " "found victim id|%s| at spot|%s| in[%s,%s]| " "doc|%s|" % (sCopyID, nRandomSpot, nBottom, nTop, sDocID)) # Is this slot still occupied by a live copy? if sCopyID in self.lCopyIDs: sVictimID = sCopyID NTRC.tracef(3, "SHLF", "proc mSelectVictimCopy " "NEWD end shelf|%s| spot|%d| hits doc|%s| " "placed[%d,%d] size|%d| outof|%d|" % (self.ID, nRandomSpot, sVictimID, cCopy.nBlkBegin, cCopy.nBlkEnd, (cCopy.nBlkEnd-cCopy.nBlkBegin+1), self.nCapacity)) else: sVictimID = None NTRC.tracef(5, "SHLF", "proc SelectVictimCopy2D " "no longer valid copyid|%s| docid|%s|" % (sCopyID, sDocID)) self.nMultipleHits += 1 break else: nLoc -= nDist NTRC.tracef(5, "SHLF", "proc SelectVictimCopy3D " "down spot|%s| intvl|[%s,%s| newloc|%s| newdist|%s|" % (nRandomSpot, nBottom, nTop, nLoc, nDist)) else: # Higher than top, look up. if nRandomSpot <= self.lCopyTops[nLoc+1]: # Found to right of nLoc. # Reevaluate ids and locations to the next slot # on the right. sCopyID = self.lCopyIDsComplete[nLoc+1-1] sDocID = self.lDocIDsComplete[nLoc+1-1] cCopy = G.dID2Copy[sCopyID] nBottom = self.lCopyTops[nLoc+1-1] sCopyID = self.lCopyIDsComplete[nLoc+1-1] NTRC.tracef(5, "SHLF", "proc SelectVictimCopy5U " "found victim id|%s| at spot|%s| in[%s,%s]| doc|%s|" % (sCopyID, nRandomSpot, nBottom, nTop, sDocID)) # Is this slot still occupied by a live copy? if sCopyID in self.lCopyIDs: sVictimID = sCopyID NTRC.tracef(3, "SHLF", "proc mSelectVictimCopy NEWU " "end shelf|%s| spot|%d| hits doc|%s| " "placed[%d,%d] size|%d| outof|%d|" % (self.ID, nRandomSpot, sVictimID, cCopy.nBlkBegin, cCopy.nBlkEnd, (cCopy.nBlkEnd-cCopy.nBlkBegin+1), self.nCapacity)) else: sVictimID = None NTRC.tracef(5, "SHLF", "proc SelectVictimCopy2U " "no longer valid copyid|%s| docid|%s|" % (sCopyID, sDocID)) self.nMultipleHits += 1 break else: nLoc += nDist NTRC.tracef(5, "SHLF", "proc SelectVictimCopy3U up " "spot|%s| intvl|[%s,%s| newloc|%s| newdist|%s|" % (nRandomSpot, nBottom, nTop, nLoc, nDist)) else: # Outside hiwater area, just count as a miss. NTRC.tracef(3, "SHLF", "proc mSelectVictimCopy shelf|%s| spot|%d| " "above hiwater|%s| empty" % (self.ID, nRandomSpot, self.nHiWater)) sVictimID = None self.nHitsAboveHiWater += 1 return sVictimID
def main(): ''' Process: - Parse the CLI command into g.various data items. - Validate user-supplied directories; get environment variables. - Make queues to send instructions to pool of worker processes. - Create pool of worker processes. - Query the searchspace for the stream of instructions - For each instruction from database selection, get dict for line - Using dict args, construct plausible command lines, into file - For each instruction, expand to the number of samples (seeds) to use. - When we finally have a single instruction to execute, queue that to the worker jobs. - When all instructions have been queued, close down the worker processes. ''' NTRC.ntracef(0, "MAIN", "Begin.") NTRC.ntracef(0, "MAIN", "TRACE traceproduction|%s|" % NTRC.isProduction()) sBrokerCommand = fnsReconstituteCommand(sys.argv) fnbMaybeLogCommand(sBrokerCommand) NTRC.ntracef(0, "MAIN", "command=|%s|" % (sBrokerCommand.rstrip())) # Get args from CLI and put them into the global data dCliDict = brokercli.fndCliParse("") # Carefully insert any new CLI values into the Global object. dCliDictClean = { k: util.fnIntPlease(v) for k, v in dCliDict.items() if v is not None } g.__dict__.update(dCliDictClean) # Validate that the user-specified directories exist. if not fnbValidateDir(g.sFamilyDir): raise ValueError("FamilyDir \"%s\" not found" % (g.sFamilyDir)) if not fnbValidateDir("%s/%s" % (g.sFamilyDir, g.sSpecificDir)): raise ValueError("SpecificDir \"%s\" not found" % (g.sSpecificDir)) # Get command templates from external file. fnGetCommandTemplates(g.sCommandListFilename) # Construct database query for this invocation. g.cFmt = brokerformat.CFormat() dQuery = g.cFmt.fndFormatQuery(dCliDict, g) # Look for overriding environment variables fnvGetEnvironmentOverrides() # Open the database to keep "done" records, # and delete moldy, old in-progress records. g.mdb = searchdatabasemongo.CSearchDatabase( g.sSearchDbMongoName, g.sSearchDbProgressCollectionName, g.sSearchDbDoneCollectionName) g.mdb.fnvDeleteProgressCollection() # Get the set of instructions for today from database. NTRC.tracef( 0, "MAIN", "proc querydict2|%s|" % (list(util.fngSortDictItemsByKeys(dQuery)))) itAllInstructions = searchspace.fndgGetSearchSpace(g.sInsDir, g.sInsTyp, dQuery) # Start the start-end threads. # Define queues. # Need a Multiprocessing Manager to own the output queue. (Do we?) (Yes.) mpmgr = mp.Manager() g.qJobs = mp.Queue() g.qOutput = mpmgr.Queue() # Start pool of worker processes. g.cWorkersInst = cworkers.CWorkers(nservers=g.nCores, qinputjobs=g.qJobs, qoutputdata=g.qOutput) # If this wasn't just a listonly run, do all the cases. if not g.sListOnly.startswith("Y"): NTRC.ntracef(3, "MAIN", "proc all instr|%s|" % (g.lGiantInstr)) else: NTRC.ntracef(0, "MAIN", "Listonly.") nRuns = fnnProcessAllInstructions(itAllInstructions) NTRC.ntracef(0, "MAIN", "End queued all runs ncases|%s|" % (g.nCases, ))
def mAuditSegment(self, mynThisSegment, mylDocs, mysCollectionID, myeCallerSyncEvent): '''\ SimPy generator to audit one segment of a collection. This does all the work. This is the single worst, most confusing, most fragile, and most awful code in the entire program. Unfortunately, in Python 2, one cannot yield from a vanilla function, only from a generator, so all that crap, and its convoluted conditional logic, is in here. *This* is the meanest, nastiest, ugliest father-raper of them all. ''' lg.logInfo("AUDIT2", "begin segmt t|%10.3f| auditid|%s| cycle|%s| " "seg|%s| cli|%s| coll|%s| ndocs|%s|range %s-%s|" % (G.env.now, self.ID, self.nNumberOfCycles, mynThisSegment, self.sClientID, self.sCollectionID, len(mylDocs), mylDocs[0], mylDocs[-1])) ###seize network resource # Seize the network resource so this audit cycle # can use it exclusively. # The "with" should take care of releasing it cClient = G.dID2Client[self.sClientID] with cClient.NetworkBandwidthResource.request() as reqnetwork: fNetworkWaitBegin = G.env.now ###wait if necessary result = yield reqnetwork # Wait for network to be free. fNetworkWaitEnd = G.env.now fNetworkWaitTime = fNetworkWaitEnd - fNetworkWaitBegin ###log result # Log event if we had to wait, or not, for the network to be free. lg.logInfo("AUDIT2", "grabnetwork t|%10.3f| auditid|%s| cli|%s| " "coll|%s| seg|%s| delay|%9.3f|" % (G.env.now, self.ID, self.sClientID, self.sCollectionID, mynThisSegment, fNetworkWaitTime)) # And restart the duration clock after the unproductive wait. fTimeCycleBegin = G.env.now # So much for timekeeping. Now do some actual work. # P h a s e 0: Check to see if any servers have died of old age, # possibly from being weakened by shock. If so, they get killed # now so that this audit segment will discover the loss. nResult = CShock.cmBeforeAudit() # P h a s e 1: Check servers for copies of docs, record losses. # Docs already permanently lost will not be put on the damaged list. self.dlDocsDamagedOnServers = cc.defaultdict(list) cCollection = G.dID2Collection[mysCollectionID] # foreach server used for this collection for sServerID in cCollection.lServerIDs: cServer = G.dID2Server[sServerID] ###foreach doc # foreach doc in this segment for sDocID in self.lDocsThisSegment: cDoc = G.dID2Document[sDocID] # If the doc is still on the server, retrieve it # and spend time doing that. # If not, then record that doc damaged on this server. fTransferTime = self.mRetrieveDoc(sDocID,sServerID) ###if okay if fTransferTime: NTRC.tracef(3, "AUD2", "proc AuditSegment3 retrieve " "t|%10.3f| doc|%s| svr|%s| xfrtim|%f|" % (G.env.now, sDocID, sServerID, fTransferTime)) ###yield timeout yield G.env.timeout(fTransferTime) else: if self.mIsDocumentLost(sDocID): pass # Do not complain if doc already known to be lost. else: # If copy is missing here, save server in # lost-list for doc. self.dlDocsDamagedOnServers[sDocID].append(sServerID) NTRC.tracef(5, "AUD2", "proc AuditSegment2 doc|%s| " "svr|%s| lost on|%s|" % (sDocID, sServerID, self.dlDocsDamagedOnServers[sDocID])) ###log copy missing on some server lg.logInfo("AUDIT2", "copymissing t|%10.3f| " "doc|%s| svr|%s| aud|%s-c%s-s%s| cli|%s| " "coll|%s|" % (G.env.now, sDocID, sServerID, self.ID, self.nNumberOfCycles, mynThisSegment, self.sClientID, self.sCollectionID)) # end foreach doc # end foreach server used for collection '''NOTE: Phase 2 here can be factored out of this function entirely because it does not yield or otherwise molest the clock. But refactoring must be done carefully because it consumes and supplies data from/for phases 1 and 3. ''' # P h a s e 2: Record severity (majority/minority/permanent) of copy losses. # NOTE: This arithmetic seems to be reasonable for all numbers # greater than two, but one copy remaining out of two is judged # to be a majority, so a repair from that single remaining copy # is labeled a majority repair. Seems kinda wrong. # Would love to split the logic of this routine into separate # functions; when you're indented seven levels, your logic is, # um, hard to explain. But we cannot yield from sub-functions, # at least not in Python2. nServers = len(cCollection.lServerIDs) nMajority = (len(cCollection.lServerIDs)+1) / 2 # recall that # int div truncates ###foreach doc on damaged list for sDocID in sorted(self.dlDocsDamagedOnServers.keys(), key=util.fniNumberFromID): ###count docs on all servers lDocLostOnServers = self.dlDocsDamagedOnServers[sDocID] nCopiesLost = len(lDocLostOnServers) nCopiesLeft = nServers - nCopiesLost # How many copies left: none, a lot, a few? NTRC.tracef(3, "AUD2", "proc AuditSegment1 doc|%s| nsvr|%s| " "loston|%s| nleft|%s|" % (sDocID, nServers, lDocLostOnServers, nCopiesLeft)) ###if doc not lost ### assess majority/minority/lost if nCopiesLeft == 0: # N O N E remain # Report permanent loss, one ping only. # Do not double-count docs already lost. Doc will not # be put onto damaged list if already lost. sRepair = "permloss" lg.logInfo("AUDIT2", "perm loss t|%10.3f| doc|%s| " "aud|%s-c%s-s%s| cli|%s| coll|%s|" % (G.env.now, sDocID, self.ID, self.nNumberOfCycles, mynThisSegment, self.sClientID, self.sCollectionID)) self.mRecordDocumentLost(sDocID) else: ###doc is repairable; determine majority/minority if nCopiesLeft >= nMajority: # M A J O R I T Y remain sRepair = "majority" else: # M I N O R I T Y remain sRepair = "minority" ###log repair type for doc lg.logInfo("AUDIT2", "%s rp t|%10.3f| doc|%s| " "aud|%s-c%s-s%s| cli|%s| coll|%s|" % (sRepair, G.env.now, sDocID, self.ID, self.nNumberOfCycles, mynThisSegment, self.sClientID, self.sCollectionID)) # P h a s e 3: repair damaged docs, if possible. ###foreach server on which doc was damaged # Put a copy back on each server where it is missing. for sServerID in lDocLostOnServers: if nCopiesLeft > 0: ###repair fTransferTime = self.mRepairDoc(sDocID,sServerID) '''\ If the repair returns False instead of a time, then that server is no longer accepting documents. Remove that server from the list, invalidate all its copies. Then tell the client to find a new server and re-place the entire collection. Schedule this notification to occur at the end of the audit cycle or segment to avoid confusing the ongoing evaluation. Auditor informs client: oops, you seem to be missing a server; and client takes corrective action at that time. Send collectionID and serverID to clientID. ''' ###if not okay ie server dead if fTransferTime == False: self.stDeadServerIDs.add((sServerID, self.sCollectionID)) lg.logInfo("AUDIT2", "dead server t|%10.3f| " "doc|%s| aud|%s| cli|%s| coll|%s| svr|%s|" % (G.env.now, sDocID, self.ID, self.sClientID, self.sCollectionID, sServerID)) else: ###log repair effected NTRC.tracef(3, "AUD2", "proc AuditSegment4 repair " "t|%10.3f| doc|%s| svr|%s| xfrtim|%f| type|%s|" % (G.env.now, sDocID, sServerID, fTransferTime, sRepair)) yield G.env.timeout(float(fTransferTime)) lg.logInfo("AUDIT2", "repair doc t|%10.3f| " "doc|%s| aud|%s| cli|%s| coll|%s| svr|%s| " "from %s copies|%d|" % (G.env.now, sDocID, self.ID, self.sClientID, self.sCollectionID, sServerID, sRepair, nCopiesLeft)) ###count repair as type maj/min for audit and doc # If repair succeeded, record and count it. if sRepair == "majority": self.mRecordDocumentMajorityRepair(sDocID) else: self.mRecordDocumentMinorityRepair(sDocID) # end foreach server that lost this doc # end foreach damaged doc lg.logInfo("AUDIT2", "end segmt t|%10.3f| auditid|%s| " "cycle|%s| seg|%s| cli|%s| coll|%s| ndocs|%s|" % (G.env.now, self.ID, self.nNumberOfCycles, mynThisSegment, self.sClientID, self.sCollectionID, len(mylDocs))) # After all that, tell the caller we finished. myeCallerSyncEvent.succeed(value=mynThisSegment) lg.logInfo("AUDIT2", "rls network t|%10.3f| auditid|%s| " "cli|%s| coll|%s| seg|%s|" % (G.env.now, self.ID, self.sClientID, self.sCollectionID, mynThisSegment)) # end network resource # If we saw any dead servers during this segment, inform the clients. for (sDeadServerID, sDeadCollectionID) in self.stDeadServerIDs: cCollection = G.dID2Collection[self.sCollectionID] cClient = G.dID2Client[cCollection.sClientID] NTRC.ntracef(3, "AUD2", "proc t|%10.3f| inform dead server " "auditid|%s| cli|%s| coll|%s| svr|%s| doc|%s|" % (G.env.now, self.ID, self.sClientID, self.sCollectionID, sServerID, sDocID)) cClient.mServerIsDead(sDeadServerID, sDeadCollectionID) self.stDeadServerIDs = set()
def main(): ''' Process: Open the file given on the command line. Open the database given on the command line. Read the two lines from the file. If the sDoneId(=mongoid) already appears in the done collection of the database, Then end. Else dictionary-ify the data (maybe csvreader already did that for us). add the dict to the done collection, including the sDoneId field. end. ''' NTRC.ntracef(0,"DCLN","datacleanup Begin.") # Get args from CLI and put them into the global data dCliDict = fndCliParse("") # Carefully insert any new CLI values into the Global object. dCliDictClean = {k:v for k,v in dCliDict.items() if v is not None} g.__dict__.update(dCliDictClean) # Get data from the extract file: one line of header, one line of data. with open(g.sInputFilename,'r') as fhInput: oReader = csv.reader(fhInput, delimiter=g.sSeparator) lHeader = oReader.next() lValues = oReader.next() NTRC.tracef(3, "DCLN", "proc lHeader|%s|" % (lHeader)) NTRC.tracef(3, "DCLN", "proc lValues|%s|" % (lValues)) dValues = dict(zip(lHeader, lValues)) NTRC.tracef(3, "DCLN", "proc dValues|%s|" % (dValues)) # Open the SearchDatabase for done and progress records. g.mdb = searchdatabasemongo.CSearchDatabase(g.sSearchDbMongoName, g.sProgressCollectionName, g.sDoneCollectionName) # Construct database query for this invocation. sInstructionId = dValues["mongoid"] sLineOut = g.sSeparator.join(lValues) NTRC.tracef(0,"DCLN","proc looking for done recd|%s|" % (sInstructionId)) # If this extract is already stored in the database, don't do it again. bIsItDone = g.mdb.fnbIsItDone(sInstructionId) if not bIsItDone: # If case not already done, add data line to the giant output file. # But first, ... # If the output file does not exist, or is empty, write the header line # in first before the data line. # (If the file does not exist, open mode=a will create an empty one.) with open(g.sGiantOutputFilename,'a') as fhOutput: if not os.stat(g.sGiantOutputFilename).st_size: sHeaderLine = g.sSeparator.join(lHeader) fhOutput.write(sHeaderLine + "\n") NTRC.tracef(3, "DCLN", "proc wroteheaderline|%s|" % (sHeaderLine)) fhOutput.write(sLineOut + "\n") NTRC.tracef(0, "DCLN", "proc line appended to output \nsLineOut|%s|" % (sLineOut)) # Probably record the done record in db. if g.sDoNotRecord.startswith("Y"): NTRC.tracef(0, "DCLN", "proc Done not recorded.") else: dResult = g.mdb.fndInsertDoneRecord(sInstructionId, dValues) # Probably delete the extract file. if g.sDoNotDelete.startswith("Y"): NTRC.tracef(0, "DCLN", "proc Input file not deleted.") else: os.remove(g.sInputFilename) NTRC.tracef(3,"DCLN", "proc fileremoved|%s|" % (g.sInputFilename)) # And remove its in-progress record from the search db. g.mdb.fndDeleteProgressRecord(sInstructionId) else: # Duplicate instruction; do not add line to output file. NTRC.tracef(0, "DCLN", "proc line NOT appended to output file \n" "sLineOut|%s|" % (sLineOut)) NTRC.ntracef(0,"DCLN","datacleanup End.") return 0
def main(): ''' Process: - Parse the CLI command into g.various data items. - Validate user-supplied directories; get environment variables. - Query the searchspace for the stream of instructions - For each instruction from database selection, get dict for line - Using dict args, construct plausible command lines, into file - Check to see that there aren't too many similar processes already running; if too many, then wait. - Launch ListActor process to execute commands. - Wait a polite interval before launching another. ''' NTRC.ntracef(0, "MAIN", "Begin.") NTRC.ntracef(0, "MAIN", "TRACE traceproduction|%s|" % NTRC.isProduction()) sBrokerCommand = fnsReconstituteCommand(sys.argv) fnbMaybeLogCommand(sBrokerCommand) NTRC.ntracef(0, "MAIN", "command=|%s|" % (sBrokerCommand.rstrip())) # Get args from CLI and put them into the global data dCliDict = brokercli.fndCliParse("") # Carefully insert any new CLI values into the Global object. dCliDictClean = {k:util.fnIntPlease(v) for k,v in dCliDict.items() if v is not None} g.__dict__.update(dCliDictClean) # Validate that the user-specified directories exist. if not fnbValidateDir(g.sFamilyDir): raise ValueError("FamilyDir \"%s\" not found" % (g.sFamilyDir)) if not fnbValidateDir("%s/%s" % (g.sFamilyDir, g.sSpecificDir)): raise ValueError("SpecificDir \"%s\" not found" % (g.sSpecificDir)) # Get command templates from external file. fnGetCommandTemplates(g.sCommandListFilename) # Construct database query for this invocation. g.cFmt = brokerformat.CFormat() dQuery = g.cFmt.fndFormatQuery(dCliDict, g) # Look for overriding environment variables fnvGetEnvironmentOverrides() # Open the database to keep "done" records, # and delete moldy, old in-progress records. g.mdb = searchdatabasemongo.CSearchDatabase(g.sSearchDbMongoName, g.sSearchDbProgressCollectionName, g.sSearchDbDoneCollectionName) g.mdb.fnvDeleteProgressCollection() # Get the set of instructions for today from database. NTRC.tracef(0,"MAIN","proc querydict2|%s|" % ((dQuery))) itAllInstructions = searchspace.fndgGetSearchSpace(g.sInsDir, g.sInsTyp, dQuery) nRuns = fnnProcessAllInstructions(itAllInstructions) # If this wasn't just a listonly run, do all the cases. if not g.sListOnly.startswith("Y"): NTRC.ntracef(3, "MAIN", "proc all instr|%s|" % (g.lGiantInstr)) nCases = nb.fntRunEverything(g, iter(g.lGiantInstr) , g.nCoreTimer, g.nStuckLimit) else: nCases = len(g.lGiantInstr) NTRC.ntracef(0, "MAIN", "End ncases|%s|" % (nCases,))
def mAuditSegment(self, mynThisSegment, mylDocs, mysCollectionID, myeCallerSyncEvent): '''\ SimPy generator to audit one segment of a collection. This does all the work. This is the single worst, most confusing, most fragile, and most awful code in the entire program. Unfortunately, in Python 2, one cannot yield from a vanilla function, only from a generator, so all that crap, and its convoluted conditional logic, is in here. *This* is the meanest, nastiest, ugliest father-raper of them all. ''' lg.logInfo("AUDIT2", "begin segmt t|%10.3f| auditid|%s| cycle|%s| " "seg|%s| cli|%s| coll|%s| ndocs|%s|range %s-%s|" % (G.env.now, self.ID, self.nNumberOfCycles, mynThisSegment, self.sClientID, self.sCollectionID, len(mylDocs), mylDocs[0], mylDocs[-1])) ###seize network resource # Seize the network resource so this audit cycle # can use it exclusively. # The "with" should take care of releasing it cClient = G.dID2Client[self.sClientID] with cClient.NetworkBandwidthResource.request() as reqnetwork: fNetworkWaitBegin = G.env.now ###wait if necessary result = yield reqnetwork # Wait for network to be free. fNetworkWaitEnd = G.env.now fNetworkWaitTime = fNetworkWaitEnd - fNetworkWaitBegin ###log result # Log event if we had to wait, or not, for the network to be free. lg.logInfo("AUDIT2", "grabnetwork t|%10.3f| auditid|%s| cli|%s| " "coll|%s| seg|%s| delay|%9.3f|" % (G.env.now, self.ID, self.sClientID, self.sCollectionID, mynThisSegment, fNetworkWaitTime)) # And restart the duration clock after the unproductive wait. fTimeCycleBegin = G.env.now # So much for timekeeping. Now do some actual work. # P h a s e 0: Check to see if any servers have died of old age, # possibly from being weakened by shock. If so, they get killed # now so that this audit segment will discover the loss. nResult = CShock.cmBeforeAudit() # P h a s e 1: Check servers for copies of docs, record losses. # Docs already permanently lost will not be put on the damaged list. self.dlDocsDamagedOnServers = cc.defaultdict(list) cCollection = G.dID2Collection[mysCollectionID] # foreach server used for this collection for sServerID in cCollection.lServerIDs: cServer = G.dID2Server[sServerID] ###foreach doc # foreach doc in this segment for sDocID in self.lDocsThisSegment: cDoc = G.dID2Document[sDocID] # If the doc is still on the server, retrieve it # and spend time doing that. # If not, then record that doc damaged on this server. fTransferTime = self.mRetrieveDoc(sDocID,sServerID) ###if okay if fTransferTime: NTRC.tracef(3, "AUD2", "proc AuditSegment3 retrieve " "t|%10.3f| doc|%s| svr|%s| xfrtim|%f|" % (G.env.now, sDocID, sServerID, fTransferTime)) ###yield timeout yield G.env.timeout(fTransferTime) else: if self.mIsDocumentLost(sDocID): pass # Do not complain if doc already known to be lost. else: # If copy is missing here, save server in # lost-list for doc. self.dlDocsDamagedOnServers[sDocID].append(sServerID) NTRC.tracef(5, "AUD2", "proc AuditSegment2 doc|%s| " "svr|%s| lost on|%s|" % (sDocID, sServerID, self.dlDocsDamagedOnServers[sDocID])) ###log copy missing on some server lg.logInfo("AUDIT2", "copymissing t|%10.3f| " "doc|%s| svr|%s| aud|%s-c%s-s%s| cli|%s| " "coll|%s|" % (G.env.now, sDocID, sServerID, self.ID, self.nNumberOfCycles, mynThisSegment, self.sClientID, self.sCollectionID)) # end foreach doc # end foreach server used for collection '''NOTE: Phase 2 here can be factored out of this function entirely because it does not yield or otherwise molest the clock. But refactoring must be done carefully because it consumes and supplies data from phases 1 and 3. ''' # P h a s e 2: Record severity (majority/minority/permanent) of copy losses. # NOTE: This arithmetic seems to be reasonable for all numbers # greater than two, but one copy remaining out of two is judged # to be a majority, so a repair from that single remaining copy # is labeled a majority repair. Seems kinda wrong. # Would love to split the logic of this routine into separate # functions; when you're indented seven levels, your logic is, # um, hard to explain. But we cannot yield from sub-functions, # at least not in Python2. nServers = len(cCollection.lServerIDs) nMajority = (len(cCollection.lServerIDs)+1) / 2 # recall that # int div truncates ###foreach doc on damaged list for sDocID in sorted(self.dlDocsDamagedOnServers.keys(), key=util.fniNumberFromID): ###count docs on all servers lDocLostOnServers = self.dlDocsDamagedOnServers[sDocID] nCopiesLost = len(lDocLostOnServers) nCopiesLeft = nServers - nCopiesLost # How many copies left: none, a lot, a few? NTRC.tracef(3, "AUD2", "proc AuditSegment1 doc|%s| nsvr|%s| " "loston|%s| nleft|%s|" % (sDocID, nServers, lDocLostOnServers, nCopiesLeft)) ###if doc not lost ### assess majority/minority/lost if nCopiesLeft == 0: # N O N E remain # Report permanent loss, one ping only. # Do not double-count docs already lost. Doc will not # be put onto damaged list if already lost. sRepair = "permloss" lg.logInfo("AUDIT2", "perm loss t|%10.3f| doc|%s| " "aud|%s-c%s-s%s| cli|%s| coll|%s|" % (G.env.now, sDocID, self.ID, self.nNumberOfCycles, mynThisSegment, self.sClientID, self.sCollectionID)) self.mRecordDocumentLost(sDocID) else: ###doc is repairable; determine majority/minority if nCopiesLeft >= nMajority: # M A J O R I T Y remain sRepair = "majority" else: # M I N O R I T Y remain sRepair = "minority" ###log repair type for doc lg.logInfo("AUDIT2", "%s rp t|%10.3f| doc|%s| " "aud|%s-c%s-s%s| cli|%s| coll|%s|" % (sRepair, G.env.now, sDocID, self.ID, self.nNumberOfCycles, mynThisSegment, self.sClientID, self.sCollectionID)) # P h a s e 3: repair damaged docs, if possible. ###foreach server on which doc was damaged # Put a copy back on each server where it is missing. for sServerID in lDocLostOnServers: if nCopiesLeft > 0: ###repair fTransferTime = self.mRepairDoc(sDocID,sServerID) '''\ If the repair returns False instead of a time, then that server is no longer accepting documents. Remove that server from the list, invalidate all its copies. Then tell the client to find a new server and re-place the entire collection. Schedule this notification to occur at the end of the audit cycle or segment to avoid confusing the ongoing evaluation. Auditor informs client: oops, you seem to be missing a server; and client takes corrective action at that time. Send collectionID and serverID to clientID. ''' ###if not okay ie server dead if fTransferTime == False: self.stDeadServerIDs.add((sServerID, self.sCollectionID)) lg.logInfo("AUDIT2", "dead server t|%10.3f| " "doc|%s| aud|%s| cli|%s| coll|%s| svr|%s|" % (G.env.now, sDocID, self.ID, self.sClientID, self.sCollectionID, sServerID)) else: ###log repair effected NTRC.tracef(3, "AUD2", "proc AuditSegment4 repair " "t|%10.3f| doc|%s| svr|%s| xfrtim|%f| type|%s|" % (G.env.now, sDocID, sServerID, fTransferTime, sRepair)) yield G.env.timeout(float(fTransferTime)) lg.logInfo("AUDIT2", "repair doc t|%10.3f| " "doc|%s| aud|%s| cli|%s| coll|%s| svr|%s| " "from %s copies|%d|" % (G.env.now, sDocID, self.ID, self.sClientID, self.sCollectionID, sServerID, sRepair, nCopiesLeft)) ###count repair as type maj/min for audit and doc # If repair succeeded, record and count it. if sRepair == "majority": self.mRecordDocumentMajorityRepair(sDocID) else: self.mRecordDocumentMinorityRepair(sDocID) # end foreach server that lost this doc # end foreach damaged doc lg.logInfo("AUDIT2", "end segmt t|%10.3f| auditid|%s| " "cycle|%s| seg|%s| cli|%s| coll|%s| ndocs|%s|" % (G.env.now, self.ID, self.nNumberOfCycles, mynThisSegment, self.sClientID, self.sCollectionID, len(mylDocs))) # After all that, tell the caller we finished. myeCallerSyncEvent.succeed(value=mynThisSegment) lg.logInfo("AUDIT2", "rls network t|%10.3f| auditid|%s| " "cli|%s| coll|%s| seg|%s|" % (G.env.now, self.ID, self.sClientID, self.sCollectionID, mynThisSegment)) # end network resource # If we saw any dead servers during this segment, inform the clients. for (sDeadServerID, sDeadCollectionID) in self.stDeadServerIDs: cCollection = G.dID2Collection[self.sCollectionID] cClient = G.dID2Client[cCollection.sClientID] NTRC.ntracef(3, "AUD2", "proc t|%10.3f| inform dead server " "auditid|%s| cli|%s| coll|%s| svr|%s| doc|%s|" % (G.env.now, self.ID, self.sClientID, self.sCollectionID, sServerID, sDocID)) cClient.mServerIsDead(sDeadServerID, sDeadCollectionID) self.stDeadServerIDs = set()
def main(mysInstructionsFileName,mysLogFileName): (lTemplate,g.dVars) = fnldParseInput(mysInstructionsFileName) lLines = list() with open(mysLogFileName,"r") as fhLogFile: '''\ get list of tuples: lines that match some lineregex, for which var foreach line, if matches any lineregex extract value, put varname and value in dictionary be careful never to form a list of lines of the input log file, or of anything that is big-O of that. filter first. ''' # Form list of all lines that match some var. nLineNr = 0 lLinesSelectedRaw = list() for sLine in fhLogFile: nLineNr += 1 # Need line nr only for debugging. for sVarname in g.dVars.keys(): tResult = fntDoesLineMatchThisVar(sLine, nLineNr, sVarname) # If line matches any var, save the line and the varname. if tResult[0]: lLinesSelectedRaw.append(tResult) NTRC.tracef(3,"MN2","proc lLinesSelectedRaw len|%s| all|%s|" % (len(lLinesSelectedRaw),lLinesSelectedRaw)) # Eliminate duplicates. Should not be any if the lineregexes are # specific enough. lLinesSelected = list(set(lLinesSelectedRaw)) NTRC.tracef(5,"MN3","proc lLinesSelected len|%s| all|%s|" % (len(lLinesSelected),lLinesSelected)) # Extract variable value from each matching line. # List of lines selected is actually a list of triples. lResults = map( lambda (omatch, sLine, sVarname): fntMatchValue(sLine, g.dVars[sVarname]) , lLinesSelected ) # Returned list of (name,val) tuples for vars in lines selected. # Make a dictionary. dValues = dict(lResults) # In case we did not find the line for a variable, dummy up a value. for sKey in g.dVars: dValues.setdefault(sKey,"nolinefound") # And in case we didn't even find a rule for some variable that # will be used in the template, dummy up a value for it, too. sTemplateHeader = "\n".join(lTemplate).replace("{","").replace("}","").replace("\n"," ") lTemplateVars = sTemplateHeader.split() for sTemplateVar in lTemplateVars: dValues.setdefault(sTemplateVar,"norulefound") # Add the synthetic variables to the value dictionary. dSyntho = fndGetSyntheticVars() dValues.update(dSyntho) # Make the seed value, at least, print constant width for legibility. sSeed = dValues["seed"] sSeednew = "%09d" % (int(sSeed)) dValues["seed"] = sSeednew # Fill in the template with values and print. # Template is allowed to be multiple lines. sTemplate = "\n".join(lTemplate) sLineout = makeCmd(sTemplate,dValues) if g.bHeader or os.environ.get("header",None): # Header is a single line concatenation of all the substitutions # in the template. # If the template is longer than one line, well, you can't read # the data with a simple header anyway. Oops. sHeader = sTemplateHeader print sHeader # Newline already pasted on the end of template; don't add another. print sLineout,
def mfCalcCurrentSectorLifetime(self, myfNow): ''' if glitch in progress if glitch is too old turn it off log expired normal lifetime else calc reduced lifetime if decay below ignore limit turn it off log below limit ''' if self.bGlitchActive: fTimeDiff = myfNow - self.fGlitchBegin fDuration = (float(self.nGlitchMaxlife)) # If the glitch lifetime has expired, turn it off. if fTimeDiff > fDuration: NTRC.tracef( 3, "LIFE", "proc glitch lifetime expired " "id|%s| num|%s| start|%.3f| now|%.3f| maxlife|%s|" % (self.ID, self.nGlitches, self.fGlitchBegin, myfNow, self.nGlitchMaxlife)) lg.logInfo( "LIFETIME", "expired t|%6.0f| shelf|%s| " "id|%s| num|%s| start|%.3f| now|%.3f| maxlife|%s|" % (myfNow, self.sShelfID, self.ID, self.nGlitches, self.fGlitchBegin, myfNow, self.nGlitchMaxlife)) self.bGlitchActive = False self.fGlitchTime += fTimeDiff self.fCurrentLifetime = self.fOriginalLifetime else: # The glitch is still current. # Carefully calculate the new sector lifetime based on # some reduction due to glitch and the age of the glitch. # fTimeDiff = myfNow - self.fGlitchBegin fAgeInHalflives = fTimeDiff / self.nGlitchDecayHalflife fExponentialDecay = exp(-self.fLn2 * fAgeInHalflives) fReductionFraction = 1.0 * self.nReductionPercentage / 100.0 self.fCurrentLifetime = ( 1.0 * self.fOriginalLifetime * (1.0 - fReductionFraction * fExponentialDecay)) NTRC.tracef( 3, "LIFE", "proc calcsectorlife num|%s| " "started|%.3f| age|%.3f| decay|%.3f| reduct|%.3f| " "currlife|%.3f|" % (self.nGlitches, self.fGlitchBegin, fAgeInHalflives, fExponentialDecay, fReductionFraction, self.fCurrentLifetime)) # If the glitch has diminished to a low level, # turn it off. if fExponentialDecay < G.fGlitchIgnoreLimit: self.bGlitchActive = False self.fGlitchTime += fTimeDiff NTRC.tracef( 3, "LIFE", "proc glitch turned off lifeid|%s| " "num|%s| started|%.3f| age|%.3f| decay|%.3f|" % (self.ID, self.nGlitches, self.fGlitchBegin, fAgeInHalflives, fExponentialDecay)) else: # No current glitch active. Lifetime is as usual. self.fCurrentLifetime = self.fOriginalLifetime return self.fCurrentLifetime
def mAge_sector(self): ''' A sector in the shelf fails. This corrupts a document. For the moment, assume that it destroys the document. Eventually, it will have a probability of destroying the document depending on the portion of the document corrupted and the sensitivity of the document to corruption (e.g., compressed or encrypted), or the failure hits an encryption or license key. ''' # If the shelf has been emptied by a shelf failure, stop # caring about sector failures. while self.bAlive: # Sector lifetime depends on shelf lifetime and glitch age. fNow = G.env.now cLifetime = G.dID2Lifetime[self.sSectorLifetimeID] fLifetimeNow = cLifetime.mfCalcCurrentSectorLifetime(fNow) fSectorLifeInterval = util.makeexpo(fLifetimeNow) NTRC.tracef( 3, "SHLF", "proc mAge_sector time|%d| shelf|%s| " "next interval|%.3f|hr from life rate|%.3f|hr" % (G.env.now, self.ID, fSectorLifeInterval, fLifetimeNow)) yield G.env.timeout(fSectorLifeInterval) # S E C T O R E R R O R self.nSectorHits += 1 G.nTimeLastEvent = G.env.now NTRC.tracef( 3, "SHLF", "proc mAge_sector time|%d| shelf|%s| " "Sector_error hits|%d| emptyhits|%d|" % (G.env.now, self.ID, self.nSectorHits, self.nEmptySectorHits)) # Select a victim Document, probability proportional to size. # Small error, size=1. What doc dies as a result? sCopyVictimID = self.mSelectVictimCopy(mynErrorSize=1) # New version: compress strings of consecutive misses into single line. # Normally we log one line per error regardless of whether it hits or # misses a document. That results in hideously long log files for # sparse storage structures, like small docs on large shelf. # Count consecutive misses, and issue one summary line before the # next hit. # CANDIDATE FOR REFACTORING if sCopyVictimID: # Hidden error in victim doc. # Destroy copy on this shelf. cCopy = G.dID2Copy[sCopyVictimID] sDocID = cCopy.mGetDocID() self.mDestroyCopy(sCopyVictimID) # Log the summary line if we just ended a string of misses if self.nConsecutiveMisses > 0: lg.logInfo( "SERVER", "small error t|%6.0f| svr|%s| " "shelf|%s| consecutive misses|%d|" % (G.env.now, self.sServerID, self.ID, self.nConsecutiveMisses)) self.nConsecutiveMisses = 0 lg.logInfo( "SERVER", "small error t|%6.0f| svr|%s| " "shelf|%s| hidden failure in copy|%s| doc|%s|" % (G.env.now, self.sServerID, self.ID, sCopyVictimID, sDocID)) NTRC.tracef( 3, "FAIL", "proc t|%d| sector failure server|%s| " "qual|%d| shelf|%s| doc|%s| copy|%s|" % (G.env.now, self.sServerID, G.dID2Server[self.sServerID].nQual, self.ID, sDocID, sCopyVictimID)) else: # No victim, hit empty space. self.nEmptySectorHits += 1 NTRC.tracef( 3, "SHLF", "proc mAge_sector shelf|%s| " "sector error fell in empty space" % (self.ID)) if self.nConsecutiveMisses == 0: lg.logInfo( "SERVER", "small error t|%6.0f| svr|%s| " "shelf|%s| hidden failure in copy|%s|" % (G.env.now, self.sServerID, self.ID, sCopyVictimID)) self.nConsecutiveMisses += 1 NTRC.tracef( 3, "FAIL", "proc t|%d| sector failure server|%s| " "qual|%d| shelf|%s| copy|%s|" % (G.env.now, self.sServerID, G.dID2Server[self.sServerID].nQual, self.ID, sCopyVictimID)) # Initiate a repair of the dead document. # BZZZT NYI: currently all such failures are silent, so they are # not detected by the client until audited (or end of run). # Shelf is no longer alive, so we do not notice or schedule # future sector errors. Log the event. lg.logInfo( "SHELF ", "t|%6.0f| dead shelf|%s| of svr|%s|, " "no future errors" % (G.env.now, self.ID, self.sServerID))
def mSelectVictimCopy(self, mynErrorSize): ''' Which doc copy on this shelf, if any, was hit by this error? Throw a uniform dart at all the docs on the shelf, see which one gets hit, or dart falls into empty space. Doc size counts. ''' nRandomSpot = util.makeunif(1, self.nCapacity + mynErrorSize - 1) nLoc = 0 NTRC.tracef( 5, "SHLF", "proc SelectVictimCopy0 wherehit spot|%s| " "hiwater|%s| shelfid|%s| capacity|%s|" % (nRandomSpot, self.nHiWater, self.ID, self.nCapacity)) # First, check to see if the failure is maybe in an occupied region. if nRandomSpot <= self.nHiWater: # Find the document hit by the error. May have been hit before, too. # New version, vanilla binary search with adjacent interval checking # on list of all locations assigned on this shelf. # After you find the location, check to see that it # is still occupied by live copy. nLen = len(self.lCopyIDsComplete) nDist = (nLen + 1) / 2 nLoc = nDist NTRC.tracef( 5, "SHLF", "proc SelectVictimCopy0 searchsetup len|%s| " "loc|%s| dist|%s|" % (nLen, nLoc, nDist)) while 1: if nLoc <= 0: nLoc = 1 if nLoc >= nLen: nLoc = nLen - 1 nDist = (nDist + 1) / 2 if nDist == 0: nDist = 1 nTop = self.lCopyTops[nLoc] nBottom = self.lCopyTops[nLoc - 1] sCopyID = self.lCopyIDsComplete[nLoc - 1] sDocID = self.lDocIDsComplete[nLoc - 1] cCopy = G.dID2Copy[sCopyID] if nRandomSpot <= nTop: # Lower than top, look down. if nRandomSpot >= nBottom: # Found to left of nLoc. NTRC.tracef( 5, "SHLF", "proc SelectVictimCopy5D " "found victim id|%s| at spot|%s| in[%s,%s]| " "doc|%s|" % (sCopyID, nRandomSpot, nBottom, nTop, sDocID)) # Is this slot still occupied by a live copy? if sCopyID in self.lCopyIDs: sVictimID = sCopyID NTRC.tracef( 3, "SHLF", "proc mSelectVictimCopy " "NEWD end shelf|%s| spot|%d| hits doc|%s| " "placed[%d,%d] size|%d| outof|%d|" % (self.ID, nRandomSpot, sVictimID, cCopy.nBlkBegin, cCopy.nBlkEnd, (cCopy.nBlkEnd - cCopy.nBlkBegin + 1), self.nCapacity)) else: sVictimID = None NTRC.tracef( 5, "SHLF", "proc SelectVictimCopy2D " "no longer valid copyid|%s| docid|%s|" % (sCopyID, sDocID)) self.nMultipleHits += 1 break else: nLoc -= nDist NTRC.tracef( 5, "SHLF", "proc SelectVictimCopy3D " "down spot|%s| intvl|[%s,%s| newloc|%s| newdist|%s|" % (nRandomSpot, nBottom, nTop, nLoc, nDist)) else: # Higher than top, look up. if nRandomSpot <= self.lCopyTops[nLoc + 1]: # Found to right of nLoc. # Reevaluate ids and locations to the next slot # on the right. sCopyID = self.lCopyIDsComplete[nLoc + 1 - 1] sDocID = self.lDocIDsComplete[nLoc + 1 - 1] cCopy = G.dID2Copy[sCopyID] nBottom = self.lCopyTops[nLoc + 1 - 1] sCopyID = self.lCopyIDsComplete[nLoc + 1 - 1] NTRC.tracef( 5, "SHLF", "proc SelectVictimCopy5U " "found victim id|%s| at spot|%s| in[%s,%s]| doc|%s|" % (sCopyID, nRandomSpot, nBottom, nTop, sDocID)) # Is this slot still occupied by a live copy? if sCopyID in self.lCopyIDs: sVictimID = sCopyID NTRC.tracef( 3, "SHLF", "proc mSelectVictimCopy NEWU " "end shelf|%s| spot|%d| hits doc|%s| " "placed[%d,%d] size|%d| outof|%d|" % (self.ID, nRandomSpot, sVictimID, cCopy.nBlkBegin, cCopy.nBlkEnd, (cCopy.nBlkEnd - cCopy.nBlkBegin + 1), self.nCapacity)) else: sVictimID = None NTRC.tracef( 5, "SHLF", "proc SelectVictimCopy2U " "no longer valid copyid|%s| docid|%s|" % (sCopyID, sDocID)) self.nMultipleHits += 1 break else: nLoc += nDist NTRC.tracef( 5, "SHLF", "proc SelectVictimCopy3U up " "spot|%s| intvl|[%s,%s| newloc|%s| newdist|%s|" % (nRandomSpot, nBottom, nTop, nLoc, nDist)) else: # Outside hiwater area, just count as a miss. NTRC.tracef( 3, "SHLF", "proc mSelectVictimCopy shelf|%s| spot|%d| " "above hiwater|%s| empty" % (self.ID, nRandomSpot, self.nHiWater)) sVictimID = None self.nHitsAboveHiWater += 1 return sVictimID
def main(mysInstructionsFileName, mysLogFileName): (lTemplate, g.dVars) = fnldParseInput(mysInstructionsFileName) lLines = list() with open(mysLogFileName, "r", encoding="'utf-8") as fhLogFile: '''\ get list of tuples: lines that match some lineregex, for which var foreach line, if matches any lineregex extract value, put varname and value in dictionary be careful never to form a list of lines of the input log file, or of anything that is big-O of that. filter first. ''' # Form list of all lines that match some var. nLineNr = 0 lLinesSelectedRaw = list() for sLine in fhLogFile: nLineNr += 1 # Need line nr only for debugging. for sVarname in g.dVars.keys(): tResult = fntDoesLineMatchThisVar(sLine, nLineNr, sVarname) # If line matches any var, save the line and the varname. if tResult[0]: lLinesSelectedRaw.append(tResult) NTRC.tracef( 3, "MN2", "proc lLinesSelectedRaw len|%s| all|%s|" % (len(lLinesSelectedRaw), lLinesSelectedRaw)) # Eliminate duplicates. Should not be any if the lineregexes are # specific enough. lLinesSelected = list(set(lLinesSelectedRaw)) NTRC.tracef( 5, "MN3", "proc lLinesSelected len|%s| all|%s|" % (len(lLinesSelected), lLinesSelected)) # Extract variable value from each matching line. # List of lines selected is actually a list of triples. # lResults = map( lambda ((omatch, sLine, sVarname)): # fntMatchValue(sLine, g.dVars[sVarname]) # , lLinesSelected ) # AAARGH: PythonV3 removed tuples as args for lambdas!!! lResults = map(lambda tLine: fntMatchValue(tLine[1], g.dVars[tLine[2]]), lLinesSelected) # Returned list of (name,val) tuples for vars in lines selected. # Make a dictionary. dValues = dict(lResults) # In case we did not find the line for a variable, dummy up a value. for sKey in g.dVars: dValues.setdefault(sKey, "nolinefound") # And in case we didn't even find a rule for some variable that # will be used in the template, dummy up a value for it, too. sTemplateHeader = "\n".join(lTemplate).replace("{", "").replace( "}", "").replace("\n", " ") lTemplateVars = sTemplateHeader.split() for sTemplateVar in lTemplateVars: dValues.setdefault(sTemplateVar, "norulefound") # Add the synthetic variables to the value dictionary. dSyntho = fndGetSyntheticVars() dValues.update(dSyntho) # Make the seed value, at least, print constant width for legibility. sSeed = dValues["seed"] sSeednew = "%09d" % (int(sSeed)) dValues["seed"] = sSeednew # Fill in the template with values and print. # Template is allowed to be multiple lines. sTemplate = "\n".join(lTemplate) sLineout = makeCmd(sTemplate, dValues) if g.bHeader or os.environ.get("header", None): # Header is a single line concatenation of all the substitutions # in the template. # If the template is longer than one line, well, you can't read # the data with a simple header anyway. Oops. sHeader = sTemplateHeader print(sHeader) # Newline already pasted on the end of template; don't add another. print(sLineout, )
def main(): ''' Process: Open the file given on the command line. Open the database given on the command line. Read the two lines from the file. If the sDoneId(=mongoid) already appears in the done collection of the database, Then end. Else dictionary-ify the data (maybe csvreader already did that for us). add the dict to the done collection, including the sDoneId field. end. ''' NTRC.ntracef(0,"DCLN","datacleanup Begin.") # Get args from CLI and put them into the global data dCliDict = fndCliParse("") # Carefully insert any new CLI values into the Global object. dCliDictClean = {k:v for k,v in dCliDict.items() if v is not None} g.__dict__.update(dCliDictClean) # Get data from the extract file: one line of header, one line of data. with open(g.sInputFilename,'r') as fhInput: oReader = csv.reader(fhInput, delimiter=g.sSeparator) lHeader = next(oReader) lValues = next(oReader) NTRC.tracef(3, "DCLN", "proc lHeader|%s|" % (lHeader)) NTRC.tracef(3, "DCLN", "proc lValues|%s|" % (lValues)) dValues = dict(zip(lHeader, lValues)) NTRC.tracef(3, "DCLN", "proc dValues|%s|" % (dValues)) # Open the SearchDatabase for done and progress records. g.mdb = searchdatabasemongo.CSearchDatabase(g.sSearchDbMongoName, g.sProgressCollectionName, g.sDoneCollectionName) # Construct database query for this invocation. sInstructionId = dValues["mongoid"] sLineOut = g.sSeparator.join(lValues) NTRC.tracef(0,"DCLN","proc looking for done recd|%s|" % (sInstructionId)) # If this extract is already stored in the database, don't do it again. bIsItDone = g.mdb.fnbIsItDone(sInstructionId) if not bIsItDone: # If case not already done, add data line to the giant output file. # But first, ... # If the output file does not exist, or is empty, write the header line # in first before the data line. # (If the file does not exist, open mode=a will create an empty one.) with open(g.sGiantOutputFilename,'a') as fhOutput: if not os.stat(g.sGiantOutputFilename).st_size: sHeaderLine = g.sSeparator.join(lHeader) fhOutput.write(sHeaderLine + "\n") NTRC.tracef(3, "DCLN", "proc wroteheaderline|%s|" % (sHeaderLine)) fhOutput.write(sLineOut + "\n") NTRC.tracef(0, "DCLN", "proc line appended to output \nsLineOut|%s|" % (sLineOut)) # Probably record the done record in db. if g.sDoNotRecord.startswith("Y"): NTRC.tracef(0, "DCLN", "proc Done not recorded.") else: dResult = g.mdb.fndInsertDoneRecord(sInstructionId, dValues) # Probably delete the extract file. if g.sDoNotDelete.startswith("Y"): NTRC.tracef(0, "DCLN", "proc Input file not deleted.") else: os.remove(g.sInputFilename) NTRC.tracef(3,"DCLN", "proc fileremoved|%s|" % (g.sInputFilename)) # And remove its in-progress record from the search db. g.mdb.fndDeleteProgressRecord(sInstructionId) else: # Duplicate instruction; do not add line to output file. NTRC.tracef(0, "DCLN", "proc line *NOT* appended to output file \n" "sLineOut|%s|" % (sLineOut)) NTRC.ntracef(0,"DCLN","datacleanup End.") return 0