Example #1
0
                             "number_of_processes": 2,                             
                             "working_directory": os.getcwd() + "/work/",
                             }    
    pilot = pilot_compute_service.create_pilot(pilot_compute_description=pilot_compute_description)
    return pilot


PERFORMANCE_DATA_FILE="DIDU-kmeans-results-" 
FIELDS=["NumberPoints", "Pilot", "KMeansImpl", "RunTimestamp", "Type"]
RESULT_DIR="results"
NUM_ITERATIONS=2

###################################################################################################
if __name__ == '__main__':
    
    DistributedInMemoryDataUnit.flushdb()
    run_timestamp=datetime.datetime.now()
    time_measures={}
    
    #############################################################################
    start = time.time()
    pilot_compute_description = {
                             "service_url": 'fork://localhost',
                             "number_of_processes": 2,                             
                             "working_directory": os.getcwd() + "/work/",
                             }   
    pilot=start_pilot()
    end_start_pilot = time.time()
    time_measures["Pilot Submission"]=end_start_pilot-start
    logger.debug("Started pilot in %.2f sec"%time_measures["Pilot Submission"])
    #############################################################################
Example #2
0
                             "number_of_processes": 2,                             
                             "working_directory": os.getcwd() + "/work/",
                             }    
    pilot = pilot_compute_service.create_pilot(pilot_compute_description=pilot_compute_description)
    return pilot


PERFORMANCE_DATA_FILE="DIDU-kmeans-results-" 
FIELDS=["NumberPoints", "Pilot", "KMeansImpl", "RunTimestamp", "Type"]
RESULT_DIR="results"
NUM_ITERATIONS=2

###################################################################################################
if __name__ == '__main__':
    
    DistributedInMemoryDataUnit.flushdb()
    run_timestamp=datetime.datetime.now()
    time_measures={}
    
    #############################################################################
    start = time.time()
    pilot_compute_description = {
                             "service_url": 'fork://localhost',
                             "number_of_processes": 2,                             
                             "working_directory": os.getcwd() + "/work/",
                             }   
    pilot=start_pilot()
    end_start_pilot = time.time()
    time_measures["Pilot Submission"]=end_start_pilot-start
    logger.debug("Started pilot in %.2f sec"%time_measures["Pilot Submission"])
    #############################################################################
            end_start_pilot = time.time()
            time_measures["Pilot Submission"]=end_start_pilot-start_pilot
            logger.debug("Started pilot in %.2f sec"%time_measures["Pilot Submission"])
            #############################################################################
            
            df = InMemoryCoordination(flushdb=True, pilot=pilot, hostname=INMEM_COORDINATION_HOST)
    
            for r in range(RUNS):                    
                for ex in range(len(inputFiles)):
                    start = time.time()
                    logger.debug("Start KMeans for input file %s, rep: %s"%(inputFiles[ex], r))
                    f = open(inputFiles[ex])
                    points = f.readlines()
                    f.close()
                    number_of_data_points=len(points)    
                    du_points = DistributedInMemoryDataUnit(name="Points", coordination=df)
                    du_points.load(points)    
                    centers = points[:clusters[ex]]
            
                    du_centers = DistributedInMemoryDataUnit(name="Centers", coordination=df)
                    du_centers.load(centers)
                    number_of_centroids_points=len(centers)  

                    end_data_load = time.time()
                    time_measures["DataLoadTime"] = end_data_load-end_start_pilot
        
                    total_map_time = 0
                    total_reduce_time = 0
                    for iteration in range(0,NUM_ITERATIONS):
                        iteration_start = time.time()