def send_rank_and_rewards(base_url, app, local_fp, feed, iter_num=1000, time_sleep=0.05, verbose=False): print('Sending rank/reward requests...') if not base_url.endswith('/'): base_url += '/' url = base_url + app rank_url = url + '/rank/' + feed site_str = 'site://' + app s = requests.Session() s.headers.update({ 'Content-Type': 'application/json', 'Accept': 'application/json' }) os.makedirs(os.path.dirname(local_fp), exist_ok=True) with open(local_fp, 'a') as f: eventIds = [] err = [0, 0] for i in range(1, iter_num + 1): r = s.get(rank_url) f.write('url:{}\tstatus_code:{}\theaders:{}\tcontent:{}\n'.format( rank_url, r.status_code, r.headers, r.content)) if r.status_code != 200 or r.headers.get( 'x-msdecision-src', None) != site_str or b'rewardAction' not in r.content: err[0] += 1 print('Rank Error - status_code: {}; headers: {}; content: {}'. format(r.status_code, r.headers, r.content)) else: eventId = str( ds_parse.extract_field(r.content, b'eventId":"', b'","'), 'utf-8') eventIds.append(eventId) reward = i + .36 r2 = s.post(url + '/reward/' + eventId, json=reward) f.write( 'url:{}\tstatus_code:{}\theaders:{}\tcontent:{}\n'.format( url + '/reward/' + eventId, r2.status_code, r2.headers, reward)) if r2.status_code != 200 or r2.headers.get( 'x-msdecision-src', None) != site_str: err[1] += 1 print('Reward Error - status_code: {}; headers: {}'.format( r2.status_code, r2.headers)) update_progress(i, iter_num, 'Errors: {}'.format(err)) time.sleep(time_sleep) return eventIds
def send_rank_and_rewards(base_url, app, local_fp, feed=None, iter_num=1000, time_sleep=0.05, verbose=False): if not base_url.endswith('/'): base_url += '/' url = base_url+app rank_url = url+'/rank/' if feed: rank_url += feed else: context_list = ["""{"decisions":[{"shared":{"features":[{"user":{"name":"Doug"},"object":{"color":"brown","weigth":"light","temp":70}}]},"actions":[{"ids":[{"id":"bread"}]},{"ids":[{"id":"dog"}]},{"ids":[{"id":"box"}]},{"ids":[{"id":"envelope"}]}]}]}""", """{"decisions": [{"shared": {"features": [{"a": {"Gender": "female"},"b": {"Location": "New York"}}]},"actions": [{"ids": [{"id": "action3"}]},{"ids": [{"id": 1,"this creates":"'constant:1' feature for marginal"}],"this": {"doesn't get logged": "because is outside of 'features'"},"features": [{"missing namespace": "works anyway, since ns 'j' is added by rest API","float":{"duration":160.6},"int":{"parts":5}}]},{"ids": [{"id": 2}],"features": [{"ns": {"name": "action2"}}]}]}]}""","""{"decisions":[{"shared":{"features":[{"user":{"name":"Doug"},"object":{"color":"brown","weigth":"light","temp":70,"jack":[5,4,6],"jack2":{},"jack3":{"j":5,"g":4,"p":3}}}]},"actions":[{"ids":[{"id":"bread"}]},{"ids":[{"id":"dog"}]},{"ids":[{"id":"box"}]},{"ids":[{"id":"envelope"}]}]}]}"""] print('Sending rank/reward requests to url: {}'.format(rank_url)) site_str = 'site://'+app s = requests.Session() s.headers.update({'Content-Type': 'application/json', 'Accept':'application/json'}) os.makedirs(os.path.dirname(local_fp), exist_ok=True) with open(local_fp, 'a') as f: eventIds = [] err = [0,0] for i in range(1,iter_num+1): if feed: r = s.get(rank_url) else: r = s.post(rank_url, context_list[i%len(context_list)]) f.write('url:{}\tstatus_code:{}\theaders:{}\tcontent:{}\n'.format(rank_url,r.status_code,r.headers,r.content)) if r.status_code != 200 or r.headers.get('x-msdecision-src', None) != site_str or b'rewardAction' not in r.content: err[0] += 1 print('Rank Error - status_code: {}; headers: {}; content: {}'.format(r.status_code,r.headers,r.content)) else: eventId = str(ds_parse.extract_field(r.content,b'eventId":"',b'","'), 'utf-8') eventIds.append(eventId) reward = i+.36 r2 = s.post(url+'/reward/'+eventId, json=reward) f.write('url:{}\tstatus_code:{}\theaders:{}\tcontent:{}\n'.format(url+'/reward/'+eventId,r2.status_code,r2.headers,reward)) if r2.status_code != 200 or r2.headers.get('x-msdecision-src', None) != site_str: err[1] += 1 print('Reward Error - status_code: {}; headers: {}'.format(r2.status_code,r2.headers)) update_progress(i, iter_num, 'Errors: {}'.format(err)) time.sleep(time_sleep) return eventIds
def get_prediction_prob(a0, pred_line): # parse probability of predicted action # this function assume that a0 is 0-index if ':' in pred_line: # prediction file has pdf of all actions (as in --cb_explore_adf -p) if ',' in pred_line: if pred_line.startswith(str(a0)+':'): sep = ':' else: sep = ','+str(a0)+':' pred_prob = float(ds_parse.extract_field(pred_line,sep,',')) else: if a0 == 0: pred_prob = 1 else: print('Error: Prediction action (0) does not match log file action ({}) - log: {} - pred: {}'.format(a0,pred_line)) sys.exit() else: # prediction file has only one action (as in --cb_adf -p) pred_prob = 1 if a0 == int(pred_line) else 0 return pred_prob
def print_stats(local_fp, azure_path, verbose=False, plot_hist=False, hist_bin=100): t = time.time() gt = {} len_local_rank = 0 dup_rank = 0 local_rew = [] lines_errs = 0 err_codes = collections.Counter() bytes_count = 0 tot_bytes = os.path.getsize(local_fp) for i, x in enumerate(open(local_fp, encoding='utf-8')): bytes_count += len(x) if (i + 1) % 10000 == 0: ds_parse.update_progress( bytes_count, tot_bytes, 'Loading Local file: {} - '.format(local_fp)) if 'status_code:200' in x: if '/rank/' in x and '"eventId":"' in x: ei = ds_parse.local_rank(x) len_local_rank += 1 if ei in gt: dup_rank += 1 else: gt[ei] = {'i': len_local_rank} elif '/reward/' in x and 'content:' in x: ei, r = ds_parse.local_reward(x) local_rew.append((ei, r)) gt[ei].setdefault('local_rew', []).append(r) else: lines_errs += 1 else: err_codes.update([ds_parse.extract_field(x, 'status_code:', '\t')]) ds_parse.update_progress(tot_bytes, tot_bytes, 'Loading Local file: {} - '.format(local_fp)) print('\n\nLoading Azure files...') if os.path.isdir(azure_path): files = [ azure_fp.path for azure_fp in scantree(azure_path) if azure_fp.name.endswith('.json') ] else: files = [azure_path] verbose_output = [] ei_miss_local = 0 azure_data = [] for ii, azure_fp in enumerate(files): bytes_count = 0 tot_bytes = os.path.getsize(azure_fp) for i, x in enumerate( gzip.open(azure_fp, 'rb') if azure_fp. endswith('.gz') else open(azure_fp, 'rb')): bytes_count += len(x) if (i + 1) % 10000 == 0: if azure_fp.endswith('.gz'): ds_parse.update_progress(i + 1, prefix='File {}/{}: {} - '.format( ii + 1, len(files), azure_fp)) else: ds_parse.update_progress( bytes_count, tot_bytes, 'File {}/{}: {} - '.format(ii + 1, len(files), azure_fp)) if x.startswith(b'{"_label_cost":'): data = ds_parse.json_cooked(x) ei = str(data['ei'], 'utf-8') c = str(data['cost'], 'utf-8') azure_data.append((ei, c)) if ei not in gt: ei_miss_local += 1 if verbose: verbose_output.append( 'Idx: {} - EventId: {} - Ranking missing from Local' .format(len(azure_data), ei)) else: gt[ei].setdefault('azure_data', []).append((c, data['ts'])) if azure_fp.endswith('.gz'): ds_parse.update_progress(i + 1, prefix='File {}/{}: {} - '.format( ii + 1, len(files), azure_fp)) else: ds_parse.update_progress( bytes_count, tot_bytes, 'File {}/{}: {} - '.format(ii + 1, len(files), azure_fp)) print() print() dup_azure_counter = collections.Counter() dup_rew_counter = collections.Counter() err_rewards_idx = [] no_events_idx = [] no_rewards_idx = [] for i, ei in enumerate(gt): if (i + 1) % 10000 == 0: ds_parse.update_progress(i + 1, len(gt), 'Evaluating differences - ') if 'local_rew' in gt[ei]: if len(gt[ei]['local_rew']) > 1: dup_rew_counter.update([len(gt[ei]['local_rew'])]) if verbose: verbose_output.append( 'Idx: {} - EventId: {} - Duplicate in Reward: {}'. format(gt[ei]['i'], ei, gt[ei]['local_rew'])) else: if 'azure_data' in gt[ei]: if len(gt[ei]['azure_data']) > 1: dup_azure_counter.update([len(gt[ei]['azure_data'])]) if verbose: verbose_output.append( 'Idx: {} - EventId: {} - Duplicate in Azure: {}' .format(gt[ei]['i'], ei, gt[ei]['azure_data'])) else: a = float(gt[ei]['local_rew'][0]) b = float(gt[ei]['azure_data'][0][0]) if abs(a + b) > max(1e-7 * max(abs(a), abs(b)), 1e-6): err_rewards_idx.append(gt[ei]['i']) if verbose: verbose_output.append( 'Idx: {} - EventId: {} - Error in reward: Local: {} Azure: {}' .format(gt[ei]['i'], ei, gt[ei]['local_rew'][0], gt[ei]['azure_data'][0])) else: no_events_idx.append(gt[ei]['i']) if verbose: verbose_output.append( 'Idx: {} - EventId: {} - Ranking missing from Azure' .format(gt[ei]['i'], ei)) else: no_rewards_idx.append(gt[ei]['i']) if verbose: verbose_output.append( 'Idx: {} - EventId: {} - Reward missing from local'.format( gt[ei]['i'], ei)) ds_parse.update_progress(i + 1, len(gt), 'Evaluating differences - ') print() for x in verbose_output: print(x) print('\nComputing summary stats...') rew_dict = {y[0]: y[1] for y in local_rew} azure_dict = {y[0]: y[1] for y in azure_data} dup_azure = sum((x - 1) * dup_azure_counter[x] for x in dup_azure_counter) dup_rew = sum((x - 1) * dup_rew_counter[x] for x in dup_rew_counter) if verbose: print('-----' * 10) print('Missing events indexes (1-based indexing)\n{}'.format( no_events_idx)) print('-----' * 10) print('Missing local rewards indexes (1-based indexing)\n{}'.format( no_rewards_idx)) print('-----' * 10) print('Wrong rewards indexes (1-based indexing)\n{}'.format( err_rewards_idx)) print('-----' * 10) print('Events in local_rank: {} (Duplicates: {})'.format( len_local_rank, dup_rank)) print('Events in local_rew: {} (Duplicates: {} - {})'.format( len(local_rew), dup_rew, dup_rew_counter)) print('Events in azure_data: {} (Duplicates: {} - {})'.format( len(azure_data), dup_azure, dup_azure_counter)) print('-----' * 10) print('Intersection local_rank/local_rew:', sum(1 for x in rew_dict if x in gt)) print('Intersection local_rank/azure_data:', sum(1 for x in azure_dict if x in gt)) print('Missing EventIds from local: {}'.format(ei_miss_local)) print('Missing EventIds from azure: {}'.format(len(no_events_idx)), end='') if no_events_idx: print(' (oldest 1-base index: {}/{})'.format(min(no_events_idx), len_local_rank), end='') print('\nMissing Local Rewards: {}'.format(len(no_rewards_idx)), end='') if no_rewards_idx: print(' (oldest 1-base index: {}/{})'.format(min(no_rewards_idx), len_local_rank), end='') print('\nWrong rewards: {}'.format(len(err_rewards_idx))) print('-----' * 10) print('status_codes errors: {}'.format(err_codes.most_common())) print('Lines skipped in Local file: {}'.format(lines_errs)) print('-----' * 10) print('Elapsed time: ', time.time() - t) if plot_hist: if err_rewards_idx or no_events_idx or no_rewards_idx: plt.rcParams.update({'font.size': 16}) # General font size if err_rewards_idx: a = plt.hist(err_rewards_idx, hist_bin, label='Wrong reward', color='xkcd:orange') if verbose: print('err_rewards_idx', a) if no_events_idx: b = plt.hist(no_events_idx, hist_bin, label='No rank', color='xkcd:blue') if verbose: print('no_events_idx', b) if no_rewards_idx: c = plt.hist(no_rewards_idx, hist_bin, label='No local reward', color='xkcd:red') if verbose: print('no_rewards_idx', c) plt.title('Missing/Wrong rank and reward requests', fontsize=20) plt.xlabel('Request index', fontsize=18) plt.ylabel('Bin Count', fontsize=18) plt.legend() plt.show() else: print('Nothing to plot! All is good!')
def send_rank_and_rewards(base_url, app, local_fp, feed=None, iter_num=1000, time_sleep=0.05, verbose=False): if not base_url.endswith('/'): base_url += '/' url = base_url + app rank_url = url + '/rank/' if feed: rank_url += feed else: context_list = [ """{"decisions":[{"shared":{"features":[{"user":{"name":"Doug"},"object":{"color":"brown","weigth":"light","temp":70}}]},"actions":[{"ids":[{"id":"bread"}]},{"ids":[{"id":"dog"}]},{"ids":[{"id":"box"}]},{"ids":[{"id":"envelope"}]}]}]}""", """{"decisions": [{"shared": {"features": [{"a": {"Gender": "female"},"b": {"Location": "New York"}}]},"actions": [{"ids": [{"id": "action3"}]},{"ids": [{"id": 1,"this creates":"'constant:1' feature for marginal"}],"this": {"doesn't get logged": "because is outside of 'features'"},"features": [{"missing namespace": "works anyway, since ns 'j' is added by rest API","float":{"duration":160.6},"int":{"parts":5}}]},{"ids": [{"id": 2}],"features": [{"ns": {"name": "action2"}}]}]}]}""", """{"decisions":[{"shared":{"features":[{"user":{"name":"Doug"},"object":{"color":"brown","weigth":"light","temp":70,"jack":[5,4,6],"jack2":{},"jack3":{"j":5,"g":4,"p":3}}}]},"actions":[{"ids":[{"id":"bread"}]},{"ids":[{"id":"dog"}]},{"ids":[{"id":"box"}]},{"ids":[{"id":"envelope"}]}]}]}""" ] print('Sending rank/reward requests to url: {}'.format(rank_url)) site_str = 'site://' + app s = requests.Session() s.headers.update({ 'Content-Type': 'application/json', 'Accept': 'application/json' }) os.makedirs(os.path.dirname(local_fp), exist_ok=True) with open(local_fp, 'a') as f: eventIds = [] err = [0, 0] for i in range(1, iter_num + 1): if feed: r = s.get(rank_url) else: r = s.post(rank_url, context_list[i % len(context_list)]) f.write('url:{}\tstatus_code:{}\theaders:{}\tcontent:{}\n'.format( rank_url, r.status_code, r.headers, r.content)) if r.status_code != 200 or r.headers.get( 'x-msdecision-src', None) != site_str or b'rewardAction' not in r.content: err[0] += 1 print('Rank Error - status_code: {}; headers: {}; content: {}'. format(r.status_code, r.headers, r.content)) else: eventId = str( ds_parse.extract_field(r.content, b'eventId":"', b'","'), 'utf-8') eventIds.append(eventId) reward = i + .36 r2 = s.post(url + '/reward/' + eventId, json=reward) f.write( 'url:{}\tstatus_code:{}\theaders:{}\tcontent:{}\n'.format( url + '/reward/' + eventId, r2.status_code, r2.headers, reward)) if r2.status_code != 200 or r2.headers.get( 'x-msdecision-src', None) != site_str: err[1] += 1 print('Reward Error - status_code: {}; headers: {}'.format( r2.status_code, r2.headers)) update_progress(i, iter_num, 'Errors: {}'.format(err)) time.sleep(time_sleep) return eventIds
def print_stats(local_fp, azure_path, verbose=False, plot_hist=False): print('Computing statistics...') local_rank = [] local_rew = [] lines_errs = 0 err_codes = collections.Counter() for x in open(local_fp, encoding='utf-8'): if 'status_code:200' in x: if '/rank/' in x and '"eventId":"' in x: local_rank.append(ds_parse.local_rank(x)) elif '/reward/' in x and 'content:' in x: local_rew.append(ds_parse.local_reward(x)) else: lines_errs += 1 else: err_codes.update([ds_parse.extract_field(x, 'status_code:', '\t')]) if os.path.isdir(azure_path): files = [ azure_fp.path for azure_fp in scantree(azure_path) if azure_fp.name.endswith('.json') ] else: files = [azure_path] azure_data = [] for azure_fp in files: for x in open(azure_fp, 'rb'): if x.startswith(b'{"_label_cost":'): data = ds_parse.json_cooked(x) azure_data.append([data['ei'], data['cost']]) local_rank_set = set(local_rank) rew_dict = {y[0]: y[1] for y in local_rew} azure_dict = {str(y[0], 'utf-8'): str(y[1], 'utf-8') for y in azure_data} err_rewards_idx = [] no_events_idx = [] no_rewards_idx = [] for i, x in enumerate(local_rank): if x in rew_dict: if x in azure_dict: if abs(1. + float(azure_dict[x]) / float(rew_dict[x])) > 1e-7: if verbose: print( 'Idx: {} - Error in reward: Local: {} Azure: {} - EventId: {}' .format(i + 1, rew_dict[x], azure_dict[x], x)) err_rewards_idx.append(i + 1) else: no_events_idx.append(i + 1) if verbose: print('Idx: {} - Ranking missing from Azure - EventId: {}'. format(i + 1, x)) else: no_rewards_idx.append(i + 1) if verbose: print( 'Idx: {} - Reward missing from local - EventId: {}'.format( i + 1, x)) dup_local = len(local_rew) - len(rew_dict) dup_azure = len(azure_data) - len(azure_dict) if verbose: print('-----' * 10) print('Missing events indexes (1-based indexing)\n{}'.format( no_events_idx)) print('-----' * 10) print('Missing local rewards indexes (1-based indexing)\n{}'.format( no_rewards_idx)) print('-----' * 10) print('Wrong rewards indexes (1-based indexing)\n{}'.format( err_rewards_idx)) if dup_local > 0: print('-----' * 10) print('Duplicates in Local rewards') dup_analysis(local_rew) if dup_azure > 0: print('-----' * 10) print('Duplicates in Azure Storage') dup_analysis(azure_data) print('-----' * 10) print('Events in local_rank: {} (Duplicates: {})'.format( len(local_rank), len(local_rank) - len(local_rank_set))) print('Events in local_rew: {} (Duplicates: {})'.format( len(local_rew), dup_local)) print('Events in azure_data: {} (Duplicates: {})'.format( len(azure_data), dup_azure)) print('-----' * 10) print('Intersection local_rank/local_rew:', len(local_rank_set.intersection(rew_dict.keys()))) print('Intersection local_rank/azure_data:', len(local_rank_set.intersection(azure_dict.keys()))) print('Missing EventIds: {}'.format(len(no_events_idx)), end='') if no_events_idx: print(' (oldest 1-base index: {}/{})'.format(min(no_events_idx), len(local_rank)), end='') print('\nMissing Local Rewards: {}'.format(len(no_rewards_idx)), end='') if no_rewards_idx: print(' (oldest 1-base index: {}/{})'.format(min(no_rewards_idx), len(local_rank)), end='') print('\nWrong rewards: {}'.format(len(err_rewards_idx))) print('-----' * 10) print('status_codes errors: {}'.format(err_codes.most_common())) print('Lines skipped in Local file: {}'.format(lines_errs)) print('-----' * 10) if plot_hist: if err_rewards_idx or no_events_idx or no_rewards_idx: plt.rcParams.update({'font.size': 16}) # General font size if err_rewards_idx: a = plt.hist(err_rewards_idx, 50, label='Wrong reward', color='xkcd:orange') if verbose: print('err_rewards_idx', a) if no_events_idx: b = plt.hist(no_events_idx, 50, label='No rank', color='xkcd:blue') if verbose: print('no_events_idx', b) if no_rewards_idx: c = plt.hist(no_rewards_idx, 50, label='No local reward', color='xkcd:red') if verbose: print('no_rewards_idx', c) plt.title('Missing/Wrong rank and reward requests', fontsize=20) plt.xlabel('Request index', fontsize=18) plt.ylabel('Bin Count', fontsize=18) plt.legend() plt.show() else: print('Nothing to plot! All is good!')
def print_stats(local_fp, azure_path, verbose=False, plot_hist=False, hist_bin=100): t = time.time() gt = {} len_local_rank = 0 dup_rank = 0 local_rew = [] lines_errs = 0 err_codes = collections.Counter() bytes_count = 0 tot_bytes = os.path.getsize(local_fp) for i,x in enumerate(open(local_fp, encoding='utf-8')): bytes_count += len(x) if (i+1) % 10000 == 0: ds_parse.update_progress(bytes_count,tot_bytes,'Loading Local file: {} - '.format(local_fp)) if 'status_code:200' in x: if '/rank/' in x and '"eventId":"' in x: ei = ds_parse.local_rank(x) len_local_rank += 1 if ei in gt: dup_rank += 1 else: gt[ei] = {'i': len_local_rank} elif '/reward/' in x and 'content:' in x: ei,r = ds_parse.local_reward(x) local_rew.append((ei,r)) gt[ei].setdefault('local_rew',[]).append(r) else: lines_errs += 1 else: err_codes.update([ds_parse.extract_field(x,'status_code:','\t')]) ds_parse.update_progress(tot_bytes,tot_bytes,'Loading Local file: {} - '.format(local_fp)) print('\n\nLoading Azure files...') if os.path.isdir(azure_path): files = [azure_fp.path for azure_fp in scantree(azure_path) if azure_fp.name.endswith('.json')] else: files = [azure_path] verbose_output = [] ei_miss_local = 0 azure_data = [] for ii,azure_fp in enumerate(files): bytes_count = 0 tot_bytes = os.path.getsize(azure_fp) for i,x in enumerate(gzip.open(azure_fp, 'rb') if azure_fp.endswith('.gz') else open(azure_fp, 'rb')): bytes_count += len(x) if (i+1) % 10000 == 0: if azure_fp.endswith('.gz'): ds_parse.update_progress(i+1,prefix='File {}/{}: {} - '.format(ii+1,len(files),azure_fp)) else: ds_parse.update_progress(bytes_count,tot_bytes,'File {}/{}: {} - '.format(ii+1,len(files),azure_fp)) if x.startswith(b'{"_label_cost":'): data = ds_parse.json_cooked(x) ei = str(data['ei'], 'utf-8') c = str(data['cost'], 'utf-8') azure_data.append((ei, c)) if ei not in gt: ei_miss_local += 1 if verbose: verbose_output.append('Idx: {} - EventId: {} - Ranking missing from Local'.format(len(azure_data),ei)) else: gt[ei].setdefault('azure_data',[]).append((c, data['ts'])) if azure_fp.endswith('.gz'): ds_parse.update_progress(i+1,prefix='File {}/{}: {} - '.format(ii+1,len(files),azure_fp)) else: ds_parse.update_progress(bytes_count,tot_bytes,'File {}/{}: {} - '.format(ii+1,len(files),azure_fp)) print() print() dup_azure_counter = collections.Counter() dup_rew_counter = collections.Counter() err_rewards_idx = [] no_events_idx = [] no_rewards_idx = [] for i,ei in enumerate(gt): if (i+1) % 10000 == 0: ds_parse.update_progress(i+1,len(gt),'Evaluating differences - ') if 'local_rew' in gt[ei]: if len(gt[ei]['local_rew']) > 1: dup_rew_counter.update([len(gt[ei]['local_rew'])]) if verbose: verbose_output.append('Idx: {} - EventId: {} - Duplicate in Reward: {}'.format(gt[ei]['i'],ei,gt[ei]['local_rew'])) else: if 'azure_data' in gt[ei]: if len(gt[ei]['azure_data']) > 1: dup_azure_counter.update([len(gt[ei]['azure_data'])]) if verbose: verbose_output.append('Idx: {} - EventId: {} - Duplicate in Azure: {}'.format(gt[ei]['i'],ei,gt[ei]['azure_data'])) else: a = float(gt[ei]['local_rew'][0]) b = float(gt[ei]['azure_data'][0][0]) if abs(a+b) > max(1e-7 * max(abs(a), abs(b)), 1e-6): err_rewards_idx.append(gt[ei]['i']) if verbose: verbose_output.append('Idx: {} - EventId: {} - Error in reward: Local: {} Azure: {}'.format(gt[ei]['i'],ei,gt[ei]['local_rew'][0],gt[ei]['azure_data'][0])) else: no_events_idx.append(gt[ei]['i']) if verbose: verbose_output.append('Idx: {} - EventId: {} - Ranking missing from Azure'.format(gt[ei]['i'],ei)) else: no_rewards_idx.append(gt[ei]['i']) if verbose: verbose_output.append('Idx: {} - EventId: {} - Reward missing from local'.format(gt[ei]['i'],ei)) ds_parse.update_progress(i+1,len(gt),'Evaluating differences - ') print() for x in verbose_output: print(x) print('\nComputing summary stats...') rew_dict = {y[0]: y[1] for y in local_rew} azure_dict = {y[0]: y[1] for y in azure_data} dup_azure = sum((x-1)*dup_azure_counter[x] for x in dup_azure_counter) dup_rew = sum((x-1)*dup_rew_counter[x] for x in dup_rew_counter) if verbose: print('-----'*10) print('Missing events indexes (1-based indexing)\n{}'.format(no_events_idx)) print('-----'*10) print('Missing local rewards indexes (1-based indexing)\n{}'.format(no_rewards_idx)) print('-----'*10) print('Wrong rewards indexes (1-based indexing)\n{}'.format(err_rewards_idx)) print('-----'*10) print('Events in local_rank: {} (Duplicates: {})'.format(len_local_rank, dup_rank)) print('Events in local_rew: {} (Duplicates: {} - {})'.format(len(local_rew), dup_rew, dup_rew_counter)) print('Events in azure_data: {} (Duplicates: {} - {})'.format(len(azure_data), dup_azure, dup_azure_counter)) print('-----'*10) print('Intersection local_rank/local_rew:',sum(1 for x in rew_dict if x in gt)) print('Intersection local_rank/azure_data:',sum(1 for x in azure_dict if x in gt)) print('Missing EventIds from local: {}'.format(ei_miss_local)) print('Missing EventIds from azure: {}'.format(len(no_events_idx)), end='') if no_events_idx: print(' (oldest 1-base index: {}/{})'.format(min(no_events_idx),len_local_rank), end='') print('\nMissing Local Rewards: {}'.format(len(no_rewards_idx)), end='') if no_rewards_idx: print(' (oldest 1-base index: {}/{})'.format(min(no_rewards_idx),len_local_rank), end='') print('\nWrong rewards: {}'.format(len(err_rewards_idx))) print('-----'*10) print('status_codes errors: {}'.format(err_codes.most_common())) print('Lines skipped in Local file: {}'.format(lines_errs)) print('-----'*10) print('Elapsed time: ',time.time()-t) if plot_hist: if err_rewards_idx or no_events_idx or no_rewards_idx: plt.rcParams.update({'font.size': 16}) # General font size if err_rewards_idx: a = plt.hist(err_rewards_idx, hist_bin, label='Wrong reward', color='xkcd:orange') if verbose: print('err_rewards_idx',a) if no_events_idx: b = plt.hist(no_events_idx, hist_bin, label='No rank', color='xkcd:blue') if verbose: print('no_events_idx',b) if no_rewards_idx: c = plt.hist(no_rewards_idx, hist_bin, label='No local reward', color='xkcd:red') if verbose: print('no_rewards_idx',c) plt.title('Missing/Wrong rank and reward requests', fontsize=20) plt.xlabel('Request index', fontsize=18) plt.ylabel('Bin Count', fontsize=18) plt.legend() plt.show() else: print('Nothing to plot! All is good!')