def test_cost_without_checkpoint(self): sequence = [4, 10] handler = rqs.LogDataCost(sequence) cost = rqs.ClusterCosts(1, 0, 0) self.assertEqual(handler.compute_cost([3], cost), 4) self.assertEqual(handler.compute_cost([7], cost), 14) cost = rqs.ClusterCosts(1, 1, 0) self.assertEqual(handler.compute_cost([3], cost), 7) self.assertEqual(handler.compute_cost([7], cost), 25) cost = rqs.ClusterCosts(1, 1, 1) self.assertEqual(handler.compute_cost([3], cost), 8) self.assertEqual(handler.compute_cost([7], cost), 27)
def test_cost_with_checkpoint(self): sequence = [(4, 1), (6, 0)] handler = rqs.LogDataCost(sequence) cost = rqs.ClusterCosts(1, 0, 0) self.assertEqual(handler.compute_cost([3], cost), 4) self.assertEqual(handler.compute_cost([7], cost), 10) cost = rqs.ClusterCosts(1, 1, 0) self.assertEqual(handler.compute_cost([3], cost), 7) self.assertEqual(handler.compute_cost([7], cost), 17) cost = rqs.ClusterCosts(1, 1, 1) self.assertEqual(handler.compute_cost([3], cost), 8) self.assertEqual(handler.compute_cost([7], cost), 19)
def test_sequence_cost(self): wl = rqs.ResourceEstimator([5] * 101) sequence = wl.compute_request_sequence() cost = wl.compute_sequence_cost(sequence, [1, 2, 3]) self.assertEqual(cost[0], 7) cost = rqs.ClusterCosts(0, 1, 0) sequence = wl.compute_request_sequence(cluster_cost=cost) cost = wl.compute_sequence_cost(sequence, [1, 2, 3], cluster_cost=cost) self.assertEqual(cost[0], 2)
def test_system_models(self): # test the Cloud model (alpha 1 beta 0 gamma 0) history = np.loadtxt("examples/logs/truncnorm.in", delimiter=' ') wl = rqs.ResourceEstimator(history) sequence = wl.compute_request_sequence(cluster_cost=rqs.ClusterCosts( reservation_cost=1, utilization_cost=0, deploy_cost=0)) self.assertTrue(abs(sequence[0][0] - 10.8) < 0.1) params = rqs.ResourceParameters() params.interpolation_model = [ rqs.DistInterpolation(list_of_distr=[norm], discretization=100) ] wl = rqs.ResourceEstimator(history, params=params) sequence = wl.compute_request_sequence(cluster_cost=rqs.ClusterCosts( reservation_cost=1, utilization_cost=0, deploy_cost=0)) self.assertTrue(abs(sequence[0][0] - 10.8) < 0.1) history = np.loadtxt("examples/logs/CT_eye_segmentation.log", delimiter=' ') wl = rqs.ResourceEstimator(history) sequence = wl.compute_request_sequence(cluster_cost=rqs.ClusterCosts( reservation_cost=1, utilization_cost=0, deploy_cost=0)) self.assertTrue(abs(sequence[0][0] / 3600 - 22.4) < 0.1)
def default_sequence(training, history, interpolation, discretization, stype): cl = rqs.ClusterCosts(1, 1, 0) training = np.append(training, max(history)) params = rqs.ResourceParameters() params.interpolation_model = interpolation, params.resource_discretization = discretization, params.CR_strategy = stype wl = rqs.ResourceEstimator(training, params=params) sequence = wl.compute_request_sequence(cluster_cost=cl) cost = wl.compute_sequence_cost(sequence, history, cluster_cost=cl) return cost
if __name__ == '__main__': if len(sys.argv) < 3: print("Usage: %s log_file training_size" % (sys.argv[0])) exit() file_name = sys.argv[1] train_size = int(sys.argv[2]) data = np.loadtxt(file_name, delimiter=' ') assert (len(data) > train_size), "Training size exceeds the total dataset" training = list(data[:train_size]) + [max(data)] # compute the requests based on the entire data wl = rqs.ResourceEstimator(data) cl = rqs.ClusterCosts(1, 0, 0) sequence = wl.compute_request_sequence(cluster_cost=cl) # cost value will be used as reference as optimal cost_opt = wl.compute_sequence_cost(sequence, data, cluster_cost=cl) print("Request sequence based on the entire dataset: %s Cost %.2f" % (sequence, cost_opt)) # compute the requests based on the training data wl = rqs.ResourceEstimator(training) sequence = wl.compute_request_sequence(cluster_cost=cl) cost = wl.compute_sequence_cost(sequence, data, cluster_cost=cl) print("Request sequence based on training: %s\n"\ "Sequence cost: %.2f (within %.2f%% of optimal)" % ( sequence, cost, (cost-cost_opt)*100/cost_opt))
import sys sys.path.append("..") import iSBatch as rqs import numpy as np if __name__ == '__main__': if len(sys.argv) < 3: print("Usage: %s walltime_log_file memory_file" % (sys.argv[0])) exit() file_name = sys.argv[1] history = np.loadtxt(file_name, delimiter=' ') memory_footprint = np.loadtxt(sys.argv[2], delimiter=',') # set the cluster cost model params = rqs.ResourceParameters() params.CR_strategy = rqs.CRStrategy.AlwaysCheckpoint check_model = rqs.StaticCheckpointMemoryModel( checkpoint_cost=np.max(memory_footprint), restart_cost=np.max(memory_footprint)) cl_cost = rqs.ClusterCosts(checkpoint_memory_model=check_model) wl = rqs.ResourceEstimator(history, params=params) sequence = wl.compute_request_sequence(cluster_cost=cl_cost) print("Request sequence (static checkpoint): %s" % (sequence)) check_model = rqs.DynamicCheckpointMemoryModel(memory_footprint) cl_cost = rqs.ClusterCosts(checkpoint_memory_model=check_model) sequence = wl.compute_request_sequence(cluster_cost=cl_cost) print("Request sequence (dynamic checkpoint): %s" % (sequence))