"number_of_processes": 2, "working_directory": os.getcwd() + "/work/", } pilot = pilot_compute_service.create_pilot(pilot_compute_description=pilot_compute_description) return pilot PERFORMANCE_DATA_FILE="DIDU-kmeans-results-" FIELDS=["NumberPoints", "Pilot", "KMeansImpl", "RunTimestamp", "Type"] RESULT_DIR="results" NUM_ITERATIONS=2 ################################################################################################### if __name__ == '__main__': DistributedInMemoryDataUnit.flushdb() run_timestamp=datetime.datetime.now() time_measures={} ############################################################################# start = time.time() pilot_compute_description = { "service_url": 'fork://localhost', "number_of_processes": 2, "working_directory": os.getcwd() + "/work/", } pilot=start_pilot() end_start_pilot = time.time() time_measures["Pilot Submission"]=end_start_pilot-start logger.debug("Started pilot in %.2f sec"%time_measures["Pilot Submission"]) #############################################################################
end_start_pilot = time.time() time_measures["Pilot Submission"]=end_start_pilot-start_pilot logger.debug("Started pilot in %.2f sec"%time_measures["Pilot Submission"]) ############################################################################# df = InMemoryCoordination(flushdb=True, pilot=pilot, hostname=INMEM_COORDINATION_HOST) for r in range(RUNS): for ex in range(len(inputFiles)): start = time.time() logger.debug("Start KMeans for input file %s, rep: %s"%(inputFiles[ex], r)) f = open(inputFiles[ex]) points = f.readlines() f.close() number_of_data_points=len(points) du_points = DistributedInMemoryDataUnit(name="Points", coordination=df) du_points.load(points) centers = points[:clusters[ex]] du_centers = DistributedInMemoryDataUnit(name="Centers", coordination=df) du_centers.load(centers) number_of_centroids_points=len(centers) end_data_load = time.time() time_measures["DataLoadTime"] = end_data_load-end_start_pilot total_map_time = 0 total_reduce_time = 0 for iteration in range(0,NUM_ITERATIONS): iteration_start = time.time()