def testFailOver(cluster, nodeToKill, addSwapFlags={}): node0 = cluster.getNode(0) nodeId = nodeToKill.nodeId Print("Kill node %d and then remove the state and blocks" % nodeId) nodeToKill.kill(signal.SIGTERM) shutil.rmtree(Utils.getNodeDataDir(nodeId,"state")) shutil.rmtree(Utils.getNodeDataDir(nodeId,"blocks")) blockNumAfterNode1Killed = node0.getHeadBlockNum() assert nodeToKill.relaunch(timeout=30, skipGenesis=False, cachePopen=True, addSwapFlags=addSwapFlags), "Fail to relaunch" assert node0.waitForIrreversibleBlockProducedBy("vltproducera", blockNumAfterNode1Killed, retry=30), "failed to see blocks produced by vltproducera"
def recoverBackedupBlksDir(nodeId): dataDir = Utils.getNodeDataDir(nodeId) # Delete existing one and copy backed up one existingBlocksDir = os.path.join(dataDir, "blocks") backedupBlocksDir = os.path.join(os.path.dirname(dataDir), os.path.basename(dataDir) + "-backup", "blocks") shutil.rmtree(existingBlocksDir, ignore_errors=True) shutil.copytree(backedupBlocksDir, existingBlocksDir)
def backupBlksDir(nodeId): dataDir = Utils.getNodeDataDir(nodeId) sourceDir = os.path.join(dataDir, "blocks") destinationDir = os.path.join(os.path.dirname(dataDir), os.path.basename(dataDir) + "-backup", "blocks") shutil.copytree(sourceDir, destinationDir)
activatedBlockNum = newNodes[0].getHeadBlockNum( ) # The PREACTIVATE_FEATURE should have been activated before or at this block num assert waitUntilBlockBecomeIrr(newNodes[0], activatedBlockNum), \ "1st node LIB should be able to advance past the block that contains PREACTIVATE_FEATURE" assert newNodes[1].getIrreversibleBlockNum() >= activatedBlockNum and \ newNodes[2].getIrreversibleBlockNum() >= activatedBlockNum, \ "2nd and 3rd node LIB should also be able to advance past the block that contains PREACTIVATE_FEATURE" assert oldNode.getIrreversibleBlockNum() <= libBeforePreactivation, \ "4th node LIB should stuck on LIB before PREACTIVATE_FEATURE is activated" # Restart old node with newest version # Before we are migrating to new version, use --export-reversible-blocks as the old version # and --import-reversible-blocks with the new version to ensure the compatibility of the reversible blocks # Finally, when we restart the 4th node with the version of nodroxe that supports protocol feature, # all nodes should be in sync, and the 4th node will also contain PREACTIVATE_FEATURE portableRevBlkPath = os.path.join(Utils.getNodeDataDir(oldNodeId), "rev_blk_portable_format") oldNode.kill(signal.SIGTERM) # Note, for the following relaunch, these will fail to relaunch immediately (expected behavior of export/import), so the chainArg will not replace the old cmd oldNode.relaunch( oldNodeId, chainArg="--export-reversible-blocks {}".format(portableRevBlkPath), timeout=1) oldNode.relaunch( oldNodeId, chainArg="--import-reversible-blocks {}".format(portableRevBlkPath), timeout=1, nodroxePath="programs/nodroxe/nodroxe") os.remove(portableRevBlkPath) restartNode(oldNode,
count = 0 while True: allDone = True for node in nodes: if node.verifyAlive(): allDone = False if allDone: break count += 1 if count > 12: Utils.cmdError("All Nodes should have died") errorExit("Failure - All Nodes should have died") time.sleep(5) for i in range(numNodes): f = open(Utils.getNodeDataDir(i) + "/stderr.txt") contents = f.read() if contents.find("database chain::guard_exception") == -1: errorExit( "Node%d is expected to exit because of database guard_exception, but was not." % (i)) Print("all nodes exited with expected reason database_guard_exception") Print("relaunch nodes with new capacity") addSwapFlags = {} maxRAMValue += 2 currentMinimumMaxRAM = maxRAMValue enabledStaleProduction = False for i in range(numNodes): addSwapFlags[maxRAMFlag] = str(maxRAMValue)
activatedBlockNum = newNodes[0].getHeadBlockNum() # The PREACTIVATE_FEATURE should have been activated before or at this block num assert waitUntilBlockBecomeIrr(newNodes[0], activatedBlockNum), \ "1st node LIB should be able to advance past the block that contains PREACTIVATE_FEATURE" assert newNodes[1].getIrreversibleBlockNum() >= activatedBlockNum and \ newNodes[2].getIrreversibleBlockNum() >= activatedBlockNum, \ "2nd and 3rd node LIB should also be able to advance past the block that contains PREACTIVATE_FEATURE" assert oldNode.getIrreversibleBlockNum() <= libBeforePreactivation, \ "4th node LIB should stuck on LIB before PREACTIVATE_FEATURE is activated" # Restart old node with newest version # Before we are migrating to new version, use --export-reversible-blocks as the old version # and --import-reversible-blocks with the new version to ensure the compatibility of the reversible blocks # Finally, when we restart the 4th node with the version of vectrum-node that supports protocol feature, # all nodes should be in sync, and the 4th node will also contain PREACTIVATE_FEATURE portableRevBlkPath = os.path.join(Utils.getNodeDataDir(oldNodeId), "rev_blk_portable_format") oldNode.kill(signal.SIGTERM) # Note, for the following relaunch, these will fail to relaunch immediately (expected behavior of export/import), so the chainArg will not replace the old cmd oldNode.relaunch(oldNodeId, chainArg="--export-reversible-blocks {}".format(portableRevBlkPath), timeout=1) oldNode.relaunch(oldNodeId, chainArg="--import-reversible-blocks {}".format(portableRevBlkPath), timeout=1, nodePath="programs/node/vectrum-node") os.remove(portableRevBlkPath) restartNode(oldNode, oldNodeId, chainArg="--replay", nodePath="programs/node/vectrum-node") time.sleep(2) # Give some time to replay assert areNodesInSync(allNodes), "All nodes should be in sync" assert shouldNodeContainPreactivateFeature(oldNode), "4th node should contain PREACTIVATE_FEATURE" testSuccessful = True finally: TestHelper.shutdown(cluster, walletMgr, testSuccessful, killInstances, killWallet, keepLogs, killAll, dumpErrorDetails)
def removeState(nodeId): dataDir = Utils.getNodeDataDir(nodeId) state = os.path.join(dataDir, "state") shutil.rmtree(state, ignore_errors=True)
def removeReversibleBlks(nodeId): dataDir = Utils.getNodeDataDir(nodeId) reversibleBlks = os.path.join(dataDir, "blocks", "reversible") shutil.rmtree(reversibleBlks, ignore_errors=True)
def getLatestSnapshot(nodeId): snapshotDir = os.path.join(Utils.getNodeDataDir(nodeId), "snapshots") snapshotDirContents = os.listdir(snapshotDir) assert len(snapshotDirContents) > 0 snapshotDirContents.sort() return os.path.join(snapshotDir, snapshotDirContents[-1])
Print("Retrieve the whole blocklog for node 0") blockLog = cluster.getBlockLog(0) foundBlockNums = checkBlockLog(blockLog, [headBlockNum, headBlockNumAfter]) assert foundBlockNums[0], "Couldn't find \"%d\" in blocklog:\n\"%s\"\n" % ( foundBlockNums[0], output) assert not foundBlockNums[ 1], "Should not find \"%d\" in blocklog:\n\"%s\"\n" % ( foundBlockNums[1], blockLog) output = cluster.getBlockLog(0, blockLogAction=BlockLogAction.smoke_test) expectedStr = "no problems found" assert output.find( expectedStr) != -1, "Couldn't find \"%s\" in:\n\"%s\"\n" % ( expectedStr, output) blockLogDir = Utils.getNodeDataDir(0, "blocks") duplicateIndexFileName = os.path.join(blockLogDir, "duplicate.index") output = cluster.getBlockLog(0, blockLogAction=BlockLogAction.make_index, outputFile=duplicateIndexFileName) assert output is not None, "Couldn't make new index file \"%s\"\n" % ( duplicateIndexFileName) blockIndexFileName = os.path.join(blockLogDir, "blocks.index") blockIndexFile = open(blockIndexFileName, "rb") duplicateIndexFile = open(duplicateIndexFileName, "rb") blockIndexStr = blockIndexFile.read() duplicateIndexStr = duplicateIndexFile.read() assert blockIndexStr == duplicateIndexStr, "Generated file \%%s\" didn't match original \"%s\"" % ( duplicateIndexFileName, blockIndexFileName)
for status in statuses: statusDesc = status["status"] if statusDesc == "done": done = True firstBlockNum = status["first_block_num"] lastBlockNum = status["last_block_num"] maxFirstBN = max(maxFirstBN, firstBlockNum) minLastBN = min(minLastBN, lastBlockNum) if statusDesc == "error": Utils.errorExit("javascript client reporting error see: %s." % (shipClientErrorFile)) assert done, Print("ERROR: Did not find a \"done\" status for client %d" % (i)) Print("All clients active from block num: %s to block_num: %s." % (maxFirstBN, minLastBN)) stderrFile=Utils.getNodeDataDir(shipNodeNum, "stderr.txt") biggestDelta = timedelta(seconds=0) totalDelta = timedelta(seconds=0) timeCount = 0 with open(stderrFile, 'r') as f: line = f.readline() while line: match = re.search(r'info\s+([0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}.[0-9]{3})\s.+Received\sblock\s+.+\s#([0-9]+)\s@\s([0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}.[0-9]{3})', line) if match: rcvTimeStr = match.group(1) prodTimeStr = match.group(3) blockNum = int(match.group(2)) if blockNum > maxFirstBN: # ship requests can only affect time after clients started rcvTime = datetime.strptime(rcvTimeStr, Utils.TimeFmt)
def get_successful_constructed_block_numbers_for_node(nodeId): result = [] for filename in glob.glob(os.path.join(Utils.getNodeDataDir(nodeId), 'stderr.*.txt')): result.extend(get_successful_constructed_block_numbers_in_file(filename)) return set(result)