def getMetaData(explist): # Get DataFrame of All Experiments df_All = analysis.getAllExperiments() # Get Cori Runs df_Cori = df_All.loc[df_All['Experiment'].isin(explist)] # Create Meta DataFrame df_Cori_Meta = pd.DataFrame( columns=['Workload', 'Ranks', 'Stencil', 'Runtime']) # Loops Over All Cori Runs for df in [df_Cori]: for run in range(0, len(df)): currentRun = df.iloc[run] eid = currentRun['Experiment'] workload = currentRun['workload'] ranks = currentRun['processors'] stencil = currentRun['stencil_size'] rab_work = currentRun['rabbit_workload'] rid = currentRun['expid'] currentPath = './mlruns/' + str(eid) + '/' + str( rid) + '/artifacts/bsp-trace.json' # Print Progress print(eid, currentPath) # Check that Rabbit is Off if (rab_work == 0): # Get Data currentData = analysis.getData(currentPath) currentData = currentData[currentData['rank'] == 0] # Calculate Total Runtime currentRunTime = currentData['interval_max_usec'].sum( ) / 1000000 # Add to Metadata df_Cori_Meta.loc[len(df_Cori_Meta)] = [ workload, int(ranks), int(stencil), currentRunTime ] return df_Cori_Meta
def get_formated_data(df_platform_NoStencil, workload, processors, fname): if path.isfile(fname): data = np.load(fname) else: ppn = 32 data_platform = analysis.getData(df_platform_NoStencil) data_platform = data_platform[[ 'workload', 'node', 'rank', 'iteration', 'iterations', 'workload_usec' ]] data_platform_Iteration0 = data_platform[( data_platform['iteration'] == 0)] print(data_platform.head()) nodes = data_platform['node'].unique() node_counter = np.zeros((len(nodes), 1)) iterations = data_platform['iterations'].iloc[0] data = np.zeros((int(processors / ppn), ppn, iterations)) for item in range(0, len(data_platform_Iteration0)): cur = data_platform_Iteration0.iloc[item] for node in nodes: if node == cur['node']: i = int(np.where(nodes == node)[0][0]) j = int(node_counter[i]) node_counter[i] = node_counter[i] + 1 curData = data_platform[data_platform['rank'] == cur['rank']] for k in range(0, iterations): temp = curData.iloc[k] data[i, j, k] = temp['workload_usec'] # save data into a npy np.save(fname, data) return data
def generatePredictionFigure( df_All, workload, baseRanks, CI, fname ): print( 'Generating Prediction for: ' + workload ) outfile1 = 'Figures/Attaway_' + workload + '_prediction' outfile2 = 'Figures/Attaway_' + workload + '_prediction_stencil' coriData = pd.read_csv( fname ) workloadData = coriData[ ( coriData['Workload'] == workload ) & ( coriData['Ranks'] == baseRanks )] kvals = np.array( [1, 2, 4, 8, 16] ) iterations = 50 MILLION = 1000000 eps = 10 ** -5 yMin = 0 yMax = 0 count = 0 fig1 = plt.figure( 1, figsize=( 7, 3 ) ) fig2 = plt.figure( 2, figsize=( 7, 3 ) ) count = 0 labels = ['No Stencil', 'Stencil'] if workload == 'hpcg' or workload == 'lammps': df_Attaway_NoStencil = df_All[ ( df_All['machine'] == 'Attaway' ) & ( df_All['processors'] == baseRanks ) & ( df_All['workload'] == workload ) & ( df_All['stencil_size'] == 0 ) ] df_List = [df_Attaway_NoStencil] else: df_Attaway_NoStencil = df_All[ ( df_All['machine'] == 'Attaway' ) & ( df_All['processors'] == baseRanks ) & ( df_All['workload'] == workload ) & ( df_All['stencil_size'] == 0 ) ] df_Attaway_Stencil = df_All [ ( df_All['machine'] == 'Attaway' ) & ( df_All['processors'] == baseRanks ) & ( df_All['workload'] == workload ) & ( df_All['stencil_size'] != 0 ) ] df_List = [df_Attaway_NoStencil, df_Attaway_Stencil] for df in df_List: print( labels[count] ) lowerBound = [] middle = [] upperBound = [] if ( count == 0 ): runtimeData = workloadData[workloadData['Stencil'] == 0]['Runtime'] else: runtimeData = workloadData[workloadData['Stencil'] != 0]['Runtime'] runtimeData = runtimeData.sort_values() minRuntime = min( runtimeData ) medianRuntime = runtimeData.iloc[ int( len( runtimeData ) / 2 ) ] maxRuntime = max( runtimeData ) print( workload, minRuntime, medianRuntime, maxRuntime ) for k in kvals: expListMin = [] expListMed = [] expListMax = [] for run in range( 0, len( df ) ): currentRun = df.iloc[run] eid = currentRun['Experiment'] rid = currentRun['expid'] currentPath = './mlruns/' + str( eid ) + '/' + str( rid ) + '/artifacts/bsp-trace.json' currentData = analysis.getData( currentPath ) currentData = currentData[currentData['rank'] == 0] currentRuntime = sum( currentData['interval_max_usec'] ) / MILLION if ( abs( currentRuntime - maxRuntime ) < eps ) or ( abs( currentRuntime - medianRuntime ) < eps ) or ( abs( currentRuntime - minRuntime ) < eps ): for i in range( iterations ): data = analysis.resample_project( currentData, len( currentData ), k, col='interval_max_usec' ) projectedRunTime = sum( data ) / MILLION print( 'Run:', run + 1, '/', len( df ), '\tIteration:', i + 1, '/', iterations, '\tWorkload:', workload, '\tk:', k, '\tProjected Runtime:', projectedRunTime ) if ( abs( currentRuntime - minRuntime ) < eps ): expListMin.append( projectedRunTime ) if ( abs( currentRuntime - medianRuntime ) < eps ): expListMed.append( projectedRunTime ) if ( abs( currentRuntime - maxRuntime ) < eps ): expListMax.append( projectedRunTime ) expListMin.sort() expListMed.sort() expListMax.sort() meanMin = np.mean( np.array( expListMin ) ) stvMin = stats.sem( np.array( expListMin ) ) meanMed = np.mean( np.array( expListMed ) ) stvMed = stats.sem( np.array( expListMed ) ) meanMax = np.mean( np.array( expListMax ) ) stvMax = stats.sem( np.array( expListMax ) ) intervalMin = stats.norm.interval( CI, loc=meanMin, scale=stvMin ) intervalMed = stats.norm.interval( CI, loc=meanMed, scale=stvMed ) intervalMax = stats.norm.interval( CI, loc=meanMax, scale=stvMax ) print( CI, '$ Confidence Interval:', intervalMin[0], intervalMax[1], '\tMedian:', ( intervalMed[0] + intervalMed[1] ) / 2 ) lowerBound.append( intervalMin[0] ) middle.append( ( intervalMed[0] + intervalMed[1] ) / 2 ) upperBound.append( intervalMax[1] ) if count == 0: plt.figure( 1 ) _ = plt.plot( baseRanks * kvals, middle, color='black' ) _ = plt.fill_between( baseRanks * kvals, lowerBound, upperBound, color='blue', alpha=0.9 ) if count == 1: plt.figure( 2 ) _ = plt.plot( baseRanks * kvals, middle, color='black' ) _ = plt.fill_between( baseRanks * kvals, lowerBound, upperBound, color='blue', alpha=0.9 ) if count == 0: yMin = min( lowerBound) yMax = max( upperBound) else: if yMin > min( lowerBound ): yMin = min( lowerBound ) if yMax < max( upperBound ): yMax = max( upperBound ) count = count + 1 attawayData = pd.read_csv( fname ) baseranks0 = list() baseRuntime0 = list() ranks0 = list() currentRuntime0 = list() baseranks1 = list() baseRuntime1 = list() ranks1 = list() currentRuntime1 = list() for run in range( 0, len( attawayData ) ): label = '' myworkload = attawayData['Workload'][run] ranks = attawayData['Ranks'][run] stencil = attawayData['Stencil'][run] currentRunTime = attawayData['Runtime'][run] if int( stencil ) > 0: label = label + 'Stencil' if label == '': if myworkload == workload: if ranks == baseRanks: baseranks0.append( int( ranks ) ) baseRuntime0.append( currentRunTime ) print( 'No Stencil - BaseRanks', workload, ranks, currentRunTime ) else: ranks0.append( int( ranks ) ) currentRuntime0.append( currentRunTime ) print( 'No Stencil - Other', workload, ranks, currentRunTime ) if not ( workload == 'hpcg' or workload == 'lammps' ): if label == 'Stencil': if myworkload == workload: if ranks == baseRanks: baseranks1.append( int( ranks ) ) baseRuntime1.append( currentRunTime ) print( 'Stencil - BaseRanks', workload, ranks, currentRunTime ) else: ranks1.append( int( ranks ) ) currentRuntime1.append( currentRunTime ) print( 'Stencil - Other', workload, ranks, currentRunTime ) print( baseranks0 ) print( baseRuntime0 ) print( ranks0 ) print( currentRuntime0 ) print( baseranks1 ) print( baseRuntime1 ) print( ranks1 ) print( currentRuntime1 ) if ( workload == 'sleep' ): w = 'ftq' else: w = workload plt.figure( 1 ) _ = plt.plot( baseranks0, baseRuntime0, 'o', color='black', label='Sample workload' ) _ = plt.plot( ranks0, currentRuntime0, 'o', color='red', label='Scaled-up workload' ) _ = plt.ylim( 0.95 * yMin, 1.05 * yMax ) _ = plt.title( 'Attaway ' + w + ' - per run bootstrap, global CIs' ) _ = plt.xlabel( 'Number of Ranks' ) _ = plt.ylabel( 'Runtime (s)' ) _ = plt.legend( loc='lower right', borderaxespad=0.5 ) plt.tight_layout() fig1.savefig( outfile1 ) plt.close( fig1 ) if ( workload != 'hpcg' and workload != 'lammps' ): plt.figure( 2 ) _ = plt.plot( baseranks1, baseRuntime1, 'o', color='black', label='Sample workload' ) _ = plt.plot( ranks1, currentRuntime1, 'o', color='red', label='Scaled-up workload' ) _ = plt.ylim( 0.95 * yMin, 1.05 * yMax ) _ = plt.title( 'Attaway ' + w + ' - per run bootstrap, global CIs - Stencil' ) _ = plt.xlabel( 'Number of Ranks' ) _ = plt.ylabel( 'Runtime (s)' ) _ = plt.legend( loc='lower right', borderaxespad=0.5 ) plt.tight_layout() fig2.savefig( outfile2 ) plt.close( fig2 )