def plot_x_ranks(ax, als_df): ttime, ctime, flops, ranks, ittime = extract_als_data(als_df) flop_cumsum_als = np.cumsum(flops) ranks = list(ranks) x = [] for i in range(1, 21): x.append(flop_cumsum_als[ranks.index(i)]) for el in x: ax.axvline(el, ymax=0.04, c='g', linewidth=linewidth)
def speedup(dic): als = dic['alsdata'] cals = dic['calsdata'] ccals = dic['ccalsdata'] tcals = cals['TOTAL'].max() icals = cals['ITERATION'].sum() if isinstance(ccals, pd.DataFrame): tccals = cals['TOTAL'].max() iccals = cals['ITERATION'].sum() ttime, ctime, flops, ranks = extract_als_data(als) tals = np.sum(ttime) print('ALS time: {}'.format(tals)) print('CALS time: It: {} To:{}'.format(icals, tcals)) print('ALS v CALS(t) Speedup: ', '{:.2f}'.format(tals / tcals)) print('ALS v CALS(i) Speedup: ', '{:.2f}'.format(tals / icals)) if isinstance(ccals, pd.DataFrame): print('CUDA time: It: {} To:{}'.format(iccals, tccals)) print('CALS v CUDA(t) Speedup: ', '{:.2f}'.format(tcals / tccals)) print('CALS v CUDA(i) Speedup: ', '{:.2f}'.format(icals / iccals))
def speedup_plot(backend, threads, modes, ax, count): x = np.arange(1, 21, 1) y = [] yc = [] for r in range(1, 21): dic = read_data(backend, threads, modes, 'speedup_{}'.format(r)) als = dic['alsdata'] cals = dic['calsdata'] cals_cuda = dic['calscudadata'] tcals = cals['ITERATION'].sum() logger.info("Total CALS time: {}".format(tcals)) ttime, _, _, _, _ = extract_als_data(als) tals = np.sum(ttime) logger.info("Total ALS time: {}".format(tals)) y.append(tals / tcals) if threads == 24 and isinstance(cals_cuda, pd.DataFrame): tccals = cals_cuda['TOTAL'].max() yc.append(tals / tccals) if threads == 1: label = '1 thread' else: label = '{} threads'.format(threads) yticks = [1] if not yc: yticks.extend(list(np.arange(3, np.max(np.array(y)) + 3, 2))) else: yticks.extend(list(np.arange(3, np.max(np.array(yc)) + 3, 2))) yticks = np.array(yticks) if not yc: max_y = np.max(np.array(ax.get_yticks())) if np.max(np.array(yticks)) > max_y: ax.set_yticks(yticks) ax.set_yticklabels([str(i) for i in list(yticks)]) ax.set_ylim([0, np.max(np.array(y)) + 0.1 * np.max(np.array(y))]) ax.set_ylabel('Speedup') else: ax.set_yscale('log') yticks = [1, 10, 100] ax.set_yticks(yticks) ax.set_yticklabels([str(i) for i in yticks]) ax.set_ylim([0.9, 110]) ax.set_ylabel('Speedup') if modes == (300, 300, 300): xticks = [1, 5, 10, 15, 20] ax.set_xticks(xticks) ax.set_xticklabels([str(i) for i in list(xticks)]) ax.set_xlabel('Components') ax.set_title(modes_title_string(dic['modes'])) ax.grid(b=True, which='both', axis='y') ax.plot(x, y, '-o', color=colors[count], label=label, markersize=markersize, linewidth=linewidth) if yc: ax.plot(x, yc, '-o', color='C2', label='CUDA', markersize=markersize, linewidth=linewidth) if modes == (100, 100, 100): ax.legend(ncol=2) return ax
def performance_plot_both(dic, ax=None, print_all=False): als_df = dic['alsdata'] als_omp = dic['alsompdata'] cals_df = dic['calsdata'] ccals_df = dic['ccalsdata'] ttb_l = dic['ttbdata'] if ax is None: fig, ax = plt.subplots(1, 1) fig.set_size_inches(w=4.68596, h=3.5) threads = str(cals_df['NUM_THREADS'][0]) mfps = CPU_FPS[threads] gemm = GEMM[modes_string(dic['modes'])][str(threads)] ttime, ctime, flops, ranks, ittime = extract_als_data(als_df) flop_cumsum_als = np.cumsum(flops) ttime_omp, ctime_omp, flops_omp, ranks_omp, ittime_omp = extract_als_data( als_omp) flop_cumsum_als_omp = np.cumsum(flops_omp) flop_cumsum_cals = cals_df['FLOPS'].cumsum() print() print( 'CALS Flops: {:>14}, Total: {:>8.2f}, Iteration sum: {:>8.2f}'.format( list(flop_cumsum_cals)[-1], cals_df['ITERATION'].sum(), cals_df['TOTAL'].max())) print('OALS Flops: {:>14}, Total: {:>8.2f}'.format( list(flop_cumsum_als_omp)[-1], ttime_omp.max())) print( ' ALS Flops: {:>14}, Total: {:>8.2f}, Iteration sum: {:>8.2f}'.format( list(flop_cumsum_als)[-1], ittime.sum(), ttime.sum())) print() ax.step(flop_cumsum_cals, cals_df['FLOPS'] / cals_df['ITERATION'] / mfps, '-', label='CALS', color='C0', markersize=markersize, linewidth=linewidth) print('{} {} {} {}'.format(flops_omp[-1], ttime_omp.max(), mfps, flops_omp[-1] / ttime_omp.max() / mfps)) if threads != '1': val = flop_cumsum_als_omp[-1] / ttime_omp.max() / mfps ax.step([flop_cumsum_als_omp[0], flop_cumsum_als_omp[-1]], [val, val], '-', label='OMP ALS', color='C6', markersize=markersize, linewidth=linewidth) ax.step(flop_cumsum_als, flops / ttime / mfps, '-', label='ALS', color='C1', markersize=markersize, linewidth=linewidth) if ttb_l: ax.step(flop_cumsum_als, flops / np.array(ttb_l) / mfps, '-', label='TTB', color='C4', markersize=markersize, linewidth=linewidth) plot_gemm(gemm, ax, flop_cumsum_als) # Plot the CALS buffer size as xticks # xticks = np.arange(1, cals_df['COLS'].count(), step=3) # plt.xticks(ticks=xticks, labels=np.array(cals_df['COLS'])[xticks - 1], rotation=45, fontsize=3) # Plot the ALS ranks as xticks # xticks = np.arange(1, len(ranks), step=1) # plt.xticks(ticks=xticks, labels=ranks[xticks - 1], rotation=45, fontsize=3) # Plot total distance as xticks flop_cumsum_cals = np.array(flop_cumsum_cals) ax.set_xticks([ 0, 0.33 * flop_cumsum_cals[-1], 0.66 * flop_cumsum_cals[-1], flop_cumsum_cals[-1] ]) if threads == '24' or print_all: ax.set_xticklabels(['0', '.33', '.66', '1']) # xticks = np.arange(1, len(ranks), step=1) # plt.xticks(ticks=xticks, labels=ranks[xticks - 1], rotation=45, fontsize=3) # if (dic['modes'] == (200, 200, 200)) and (threads == '1'): plot_x_ranks(ax, als_df) if ((dic['modes'] == (100, 100, 100) or dic['modes'] == (299, 301, 41)) and (threads == '12')) or print_all: ax.legend() if ((dic['modes'] == (100, 100, 100)) or (dic['modes'] == (100, 100, 100) and threads == '1')) or print_all: ax.set_ylabel('Efficiency (Threads: {})'.format(threads)) else: ax.tick_params(labelleft=False, left=True) if dic['modes'] == (299, 301, 41): ax.set_ylabel('Efficiency (Threads: {})'.format(threads)) ax.tick_params(labelleft=True, left=True) else: if threads == '24': ax.set_xlabel('Total computation') # if threads_on_title: # ax.set_title('Threads: {}'.format(threads)) # else: # ax.set_title(mode_string_title(dic['modes'])) if threads == "1" or print_all: ax.set_title(modes_title_string(dic['modes'])) ax.set_xlim([ -0.02 * flop_cumsum_cals[-1], flop_cumsum_cals[-1] + 0.02 * flop_cumsum_cals[-1] ]) ax.set_ylim([0, 1]) ax.set_yticks(ticks=np.arange(0, 1.1, step=0.1)) ax.grid(True, axis='y') # plt.tight_layout() if ax is None: plt.savefig(plot_output_path + 'ALS_v_CALS_' + dic['backend'] + '_modes_' + modes_string(dic['modes']) + '_threads_' + str(dic['threads']) + fig_format)
columns = {'TTB': [], 'CP-ALS': [], 'OMP ALS': [], 'CALS': []} df = pd.DataFrame(index=index, columns=columns) for th in index: dic = read_data(backend, th, modes) als = dic['alsdata'] als_cuda = dic['alscudadata'] als_omp = dic['alsompdata'] als_omp_cuda = dic['alsompcudadata'] cals = dic['calsdata'] cals_cuda = dic['calscudadata'] ttb = dic['ttbdata'] df.at[th, 'TTB'] = np.sum(ttb) ttime, _, _, _, _ = extract_als_data(als) df.at[th, 'CP-ALS'] = np.sum(ttime) tcals = cals['TOTAL'].max() df.at[th, 'CALS'] = tcals if th == 24: ttime_omp, _, _, _, _ = extract_als_data(als_omp) df.at[th, 'OMP ALS'] = np.max(ttime_omp) if th == 24 and isinstance(als_cuda, pd.DataFrame): ttime_cu, _, _, _, _ = extract_als_data(als_cuda) df.at['CUDA', 'CP-ALS'] = np.sum(ttime_cu) if th == 24 and isinstance(als_omp_cuda, pd.DataFrame): ttime_omp_cu, _, _, _, _ = extract_als_data(als_omp_cuda)