def processAnswer(self, exp_uid, args_json): try: args_dict = self.helper.convert_json(args_json) args_dict = verifier.verify(args_dict, self.reference_dict['processAnswer']['args']) # Update timing info in query query = self.butler.queries.get(uid=args_dict['args']['query_uid']) timestamp_answer_received = args_dict['args'].get('timestamp_answer_received', None) delta_datetime = utils.str2datetime(timestamp_answer_received) - \ utils.str2datetime(query['timestamp_query_generated']) round_trip_time = delta_datetime.total_seconds() response_time = float(args_dict['args'].get('response_time',0.)) query_update = self.call_app_fn(query['alg_label'], query['alg_id'], 'processAnswer', args_dict) query_update.update({'response_time':response_time, 'network_delay':round_trip_time - response_time, 'timestamp_answer_received': timestamp_answer_received }) self.butler.queries.set_many(uid=args_dict['args']['query_uid'],key_value_dict=query_update) return json.dumps({'args': {}, 'meta': {'log_entry_durations':self.log_entry_durations}}), True, '' except Exception, error: exc_type, exc_value, exc_traceback = sys.exc_info() full_error = str(traceback.format_exc())+'\n'+str(error) utils.debug_print("processAnswer Exception: " + full_error, color='red') log_entry = { 'exp_uid':exp_uid,'task':'processAnswer','error':full_error,'timestamp':utils.datetimeNow(),'args_json':args_json } self.butler.ell.log( self.app_id+':APP-EXCEPTION', log_entry ) traceback.print_tb(exc_traceback) raise Exception(error)
def api_activity_histogram(self, app, butler): """ Description: returns the data to plot all API activity (for all algorithms) in a histogram with respect to time for any task in {getQuery,processAnswer,predict} Expected output (in dict): (dict) MPLD3 plot dictionary """ queries = butler.queries.get(pattern={'exp_uid': app.exp_uid}) #self.db.get_docs_with_filter(app_id+':queries',{'exp_uid':exp_uid}) start_date = utils.str2datetime( butler.admin.get(uid=app.exp_uid)['start_date']) numerical_timestamps = [ (utils.str2datetime(item['timestamp_query_generated']) - start_date).total_seconds() for item in queries ] fig, ax = plt.subplots(subplot_kw=dict(axisbg='#FFFFFF'), figsize=(12, 1.5)) ax.hist(numerical_timestamps, min(int(1 + 4 * numpy.sqrt(len(numerical_timestamps))), 300), alpha=0.5, color='black') ax.set_frame_on(False) ax.get_xaxis().set_ticks([]) ax.get_yaxis().set_ticks([]) ax.get_yaxis().set_visible(False) ax.set_xlim(0, max(numerical_timestamps)) plot_dict = mpld3.fig_to_dict(fig) plt.close() return plot_dict
def experiment_dashboard(exp_uid, app_id, exp_key): """ Endpoint that renders the experiment dashboard. Inputs: ::\n (string) exp_uid, exp_uid for a current experiment. """ # Not a particularly good way to do this. alg_label_list = rm.get_algs_for_exp_uid(exp_uid) exp_start_data = rm.get_app_exp_uid_start_date(exp_uid) + ' UTC' participant_uids = rm.get_participant_uids(exp_uid) num_participants = len(participant_uids) last_datetime = None num_queries = 0 for participant_uid in participant_uids: queries = rm.get_participant_data(participant_uid, exp_uid) num_queries += len(queries) this_datetime = utils.str2datetime( queries[-1]['timestamp_query_generated']) if last_datetime == None or this_datetime > last_datetime: last_datetime = this_datetime last_activity = utils.datetime2str(last_datetime) + ' UTC' # Migrate this code to use keychain docs, didSucceed, message = db.getDocsByPattern('next_frontend_base', 'keys', { 'object_id': exp_uid, 'type': 'perm' }) perm_key = docs[0]['_id'] alg_list = [{ 'alg_label': alg['alg_label'], 'alg_label_clean': '_'.join(alg['alg_label'].split()) } for alg in alg_label_list] if (constants.NEXT_BACKEND_GLOBAL_HOST and constants.NEXT_BACKEND_GLOBAL_PORT): host_url = 'http://{}:{}'.format(constants.NEXT_BACKEND_GLOBAL_HOST, constants.NEXT_BACKEND_GLOBAL_PORT) else: host_url = '' print 'host_url', host_url print constants.NEXT_BACKEND_GLOBAL_HOST, constants.NEXT_BACKEND_GLOBAL_PORT env = Environment(loader=ChoiceLoader([ PackageLoader('next.apps.{}'.format(app_id), 'dashboard'), PackageLoader('next.dashboard', 'templates') ])) template = env.get_template('{}.html'.format(app_id)) return template.render(app_id=app_id, exp_uid=exp_uid, alg_list=alg_list, host_url=host_url, perm_key=perm_key, url_for=url_for, exp_start_data=exp_start_data, num_participants=num_participants, num_queries=num_queries, last_activity=last_activity)
def api_activity_histogram(self, app, butler): """ Description: returns the data to plot all API activity (for all algorithms) in a histogram with respect to time for any task in {getQuery,processAnswer,predict} Expected output (in dict): (dict) MPLD3 plot dictionary """ queries = butler.queries.get(pattern={'exp_uid':app.exp_uid}) #self.db.get_docs_with_filter(app_id+':queries',{'exp_uid':exp_uid}) start_date = utils.str2datetime(butler.admin.get(uid=app.exp_uid)['start_date']) numerical_timestamps = [(utils.str2datetime(item['timestamp_query_generated'])-start_date).total_seconds() for item in queries] fig, ax = plt.subplots(subplot_kw=dict(axisbg='#FFFFFF'),figsize=(12,1.5)) ax.hist(numerical_timestamps,min(int(1+4*numpy.sqrt(len(numerical_timestamps))),300),alpha=0.5,color='black') ax.set_frame_on(False) ax.get_xaxis().set_ticks([]) ax.get_yaxis().set_ticks([]) ax.get_yaxis().set_visible(False) ax.set_xlim(0, max(numerical_timestamps)) plot_dict = mpld3.fig_to_dict(fig) plt.close() return plot_dict
def get_app_exp_uid_start_date(self,exp_uid): """ Returns date in a string when experiment was initiazlied Inputs: ::\n (string) exp_uid : unique experiment identifier Outputs: ::\n (datetime) start_date : start date in datetime format Usage: ::\n rm.get_app_exp_uid_start_date('PoolBasedTripletMDS') """ start_date = db.get('experiments_admin',exp_uid,'start_date') if isinstance(start_date, datetime): return start_date else: return utils.str2datetime(start_date)
def test_error_multiline_plot(self, app, butler): """ Description: Returns multiline plot where there is a one-to-one mapping lines to algorithms and each line indicates the error on the validation set with respect to number of reported answers Expected input: None Expected output (in dict): (dict) MPLD3 plot dictionary """ # get list of algorithms associated with project args = butler.experiment.get(key='args') test_alg_label = args['alg_list'][0]['test_alg_label'] test_S = butler.queries.get(pattern={'exp_uid':app.exp_uid, 'alg_label':test_alg_label}) x_min = numpy.float('inf') x_max = -numpy.float('inf') y_min = numpy.float('inf') y_max = -numpy.float('inf') list_of_alg_dicts = [] for algorithm in args['alg_list']: alg_label = algorithm['alg_label'] list_of_log_dict,didSucceed,message = butler.ell.get_logs_with_filter(app.app_id+':ALG-EVALUATION',{'exp_uid':app.exp_uid, 'alg_label':alg_label}) list_of_log_dict = sorted(list_of_log_dict, key=lambda item: utils.str2datetime(item['timestamp']) ) x = [] y = [] for item in list_of_log_dict: num_reported_answers = item['num_reported_answers'] Xd = item['X'] err = 0.5 if len(test_S)>0: # compute error rate number_correct = 0. for query in test_S: if 'q' in query: i, j, k = query['q'] score = numpy.dot(Xd[j],Xd[j]) -2*numpy.dot(Xd[j],Xd[k]) + 2*numpy.dot(Xd[i],Xd[k]) - numpy.dot(Xd[i],Xd[i]) if score > 0: number_correct += 1.0 accuracy = number_correct/len(test_S) err = 1.0-accuracy x.append(num_reported_answers) y.append(err) alg_dict = {'legend_label':alg_label, 'x':x,'y':y} try: x_min = min(x_min,min(x)) x_max = max(x_max,max(x)) y_min = min(y_min,min(y)) y_max = max(y_max,max(y)) except: pass list_of_alg_dicts.append(alg_dict) import matplotlib.pyplot as plt import mpld3 fig, ax = plt.subplots(subplot_kw=dict(axisbg='#EEEEEE')) for alg_dict in list_of_alg_dicts: ax.plot(alg_dict['x'],alg_dict['y'],label=alg_dict['legend_label']) ax.set_xlabel('Number of answered triplets') ax.set_ylabel('Error on hold-out set') ax.set_xlim([x_min,x_max]) ax.set_ylim([y_min,y_max]) ax.grid(color='white', linestyle='solid') ax.set_title('Triplet Test Error', size=14) legend = ax.legend(loc=2,ncol=3,mode="expand") for label in legend.get_texts(): label.set_fontsize('small') plot_dict = mpld3.fig_to_dict(fig) plt.close() return plot_dict
def reportAnswer(self,exp_uid,args_json,db,ell): """ reporting back the reward of pulling the arm suggested by getQuery Expected input: (str) query_uid : unique identifier of query (int) index_winner : index of arm must be one of the indices given by getQuery Expected output (comma separated): if error: return (JSON) '{}', (bool) False, (str) error else: return (JSON) '{}', (bool) True,'' Usage: reportAnswer_args_json,didSucceed,message = app.reportAnswer(exp_uid,reportAnswer_args_json) Example input: reportAnswer_args_json = {"query_uid": "4d02a9924f92138287edd17ca5feb6e1", "index_winner": 8} Example output: reportAnswer_response_json = {} """ try: app_id = self.app_id log_entry = { 'exp_uid':exp_uid,'task':'reportAnswer','json':args_json,'timestamp':utils.datetimeNow() } ell.log( app_id+':APP-CALL', log_entry ) # convert args_json to args_dict try: args_dict = json.loads(args_json) except: error = "%s.reportAnswer input args_json is in improper format" % self.app_id return '{}',False,error # check for the fields that must be contained in args or error occurs necessary_fields = ['index_winner','query_uid'] for field in necessary_fields: try: args_dict[field] except KeyError: error = "%s.reportAnswer input arguments missing field: %s" % (self.app_id,str(field)) return '{}',False,error # get list of algorithms associated with project alg_list,didSucceed,message = db.get(app_id+':experiments',exp_uid,'alg_list') # get alg_id query_uid = args_dict['query_uid'] alg_uid,didSucceed,message = db.get(app_id+':queries',query_uid,'alg_uid') for algorithm in alg_list: if alg_uid == algorithm['alg_uid']: alg_id = algorithm['alg_id'] alg_label = algorithm['alg_label'] num_reported_answers,didSucceed,message = db.increment(app_id+':experiments',exp_uid,'num_reported_answers_for_'+alg_uid) # get sandboxed database for the specific app_id,alg_id,exp_uid - closing off the rest of the database to the algorithm rc = ResourceClient(app_id,exp_uid,alg_uid,db) # get specific algorithm to make calls to alg = utils.get_app_alg(self.app_id,alg_id) # get targets associated with the specific query targets,didSucceed,message = db.get(app_id+':queries',query_uid,'target_indices') target_indices = [] for target in targets: target_indices.append(target['index']) # get the index winner index_winner = args_dict['index_winner'] # update query doc timestamp_query_generated,didSucceed,message = db.get(app_id+':queries',query_uid,'timestamp_query_generated') datetime_query_generated = utils.str2datetime(timestamp_query_generated) timestamp_answer_received = args_dict.get('meta',{}).get('timestamp_answer_received',None) if timestamp_answer_received == None: datetime_answer_received = datetime_query_generated else: datetime_answer_received = utils.str2datetime(timestamp_answer_received) delta_datetime = datetime_answer_received - datetime_query_generated round_trip_time = delta_datetime.seconds + delta_datetime.microseconds/1000000. response_time = float(args_dict.get('response_time',0.)) db.set(app_id+':queries',query_uid,'response_time',response_time) db.set(app_id+':queries',query_uid,'network_delay',round_trip_time-response_time) db.set(app_id+':queries',query_uid,'index_winner',index_winner) # call reportAnswer didSucceed,dt = utils.timeit(alg.reportAnswer)(resource=rc,targets=target_indices,index_winner=index_winner) log_entry_durations = { 'exp_uid':exp_uid,'alg_uid':alg_uid,'task':'reportAnswer','duration':dt } log_entry_durations.update( rc.getDurations() ) meta = {'log_entry_durations':log_entry_durations} # calling predict ############### predict_id = 'arm_ranking' params = {'alg_label':alg_label} predict_args_dict = {'predict_id':predict_id,'params':params} predict_args_json = json.dumps(predict_args_dict) db.submit_job(app_id,exp_uid,'predict',predict_args_json,ignore_result=True) ############### response_args_dict = {} args_out = {'args':response_args_dict,'meta':meta} response_json = json.dumps(args_out) log_entry = { 'exp_uid':exp_uid,'task':'reportAnswer','json':response_json,'timestamp':utils.datetimeNow() } ell.log( app_id+':APP-RESPONSE', log_entry ) return response_json,True,"" except Exception, err: error = traceback.format_exc() log_entry = { 'exp_uid':exp_uid,'task':'reportAnswer','error':error,'timestamp':utils.datetimeNow(),'args_json':args_json } ell.log( app_id+':APP-EXCEPTION', log_entry ) return '{}',False,error
def compute_duration_multiline_plot(self, app, butler, task): """ Description: Returns multiline plot where there is a one-to-one mapping lines to algorithms and each line indicates the durations to complete the task (wrt to the api call) Expected input: (string) task : must be in {'getQuery','processAnswer','predict'} Expected output (in dict): (dict) MPLD3 plot dictionary """ alg_list = butler.experiment.get(key='args')['alg_list'] x_min = numpy.float('inf') x_max = -numpy.float('inf') y_min = numpy.float('inf') y_max = -numpy.float('inf') list_of_alg_dicts = [] for algorithm in alg_list: alg_label = algorithm['alg_label'] list_of_log_dict,didSucceed,message = butler.ell.get_logs_with_filter(app.app_id+':ALG-DURATION', {'exp_uid':app.exp_uid,'alg_label':alg_label,'task':task}) list_of_log_dict = sorted(list_of_log_dict, key=lambda item: utils.str2datetime(item['timestamp']) ) x = [] y = [] t = [] k=0 for item in list_of_log_dict: k+=1 x.append(k) y.append( item.get('app_duration',0.) + item.get('duration_enqueued',0.) ) t.append(str(item['timestamp'])[:-3]) x = numpy.array(x) y = numpy.array(y) t = numpy.array(t) num_items = len(list_of_log_dict) multiplier = min(num_items,MAX_SAMPLES_PER_PLOT) incr_inds = [ r*num_items/multiplier for r in range(multiplier)] max_inds = list(numpy.argsort(-y)[0:multiplier]) final_inds = sorted(set(incr_inds + max_inds)) x = list(x[final_inds]) y = list(y[final_inds]) t = list(t[final_inds]) alg_dict = {} alg_dict['legend_label'] = alg_label alg_dict['x'] = x alg_dict['y'] = y alg_dict['t'] = t try: x_min = min(x_min,min(x)) x_max = max(x_max,max(x)) y_min = min(y_min,min(y)) y_max = max(y_max,max(y)) except: pass list_of_alg_dicts.append(alg_dict) return_dict = {} return_dict['data'] = list_of_alg_dicts return_dict['plot_type'] = 'multi_line_plot' return_dict['x_label'] = 'API Call' return_dict['x_min'] = x_min return_dict['x_max'] = x_max return_dict['y_label'] = 'Duration (s)' return_dict['y_min'] = y_min return_dict['y_max'] = y_max fig, ax = plt.subplots(subplot_kw=dict(axisbg='#EEEEEE')) for alg_dict in list_of_alg_dicts: ax.plot(alg_dict['x'],alg_dict['y'],label=alg_dict['legend_label']) ax.set_xlabel('API Call') ax.set_ylabel('Duration (s)') ax.set_xlim([x_min,x_max]) ax.set_ylim([y_min,y_max]) ax.grid(color='white', linestyle='solid') ax.set_title(task, size=14) legend = ax.legend(loc=2,ncol=3,mode="expand") for label in legend.get_texts(): label.set_fontsize('small') plot_dict = mpld3.fig_to_dict(fig) plt.close() return plot_dict
def compute_duration_detailed_stacked_area_plot(self, app, butler, task, alg_label, detailedDB=False): """ Description: Returns stacked area plot for a particular algorithm and task where the durations are broken down into compute,db_set,db_get (for cpu, database_set, database_get) Expected input: (string) task : must be in {'getQuery','processAnswer','predict'} (string) alg_label : must be a valid alg_label contained in alg_list list of dicts Expected output (in dict): (dict) MPLD3 plot dictionary """ list_of_log_dict = butler.ell.get_logs_with_filter( app.app_id + ':ALG-DURATION', { 'exp_uid': app.exp_uid, 'alg_label': alg_label, 'task': task }) list_of_log_dict = sorted( list_of_log_dict, key=lambda item: utils.str2datetime(item['timestamp'])) y = [] for item in list_of_log_dict: y.append( item.get('app_duration', 0.) + item.get('duration_enqueued', 0.)) y = numpy.array(y) num_items = len(list_of_log_dict) multiplier = min(num_items, MAX_SAMPLES_PER_PLOT) incr_inds = [k * num_items / multiplier for k in range(multiplier)] max_inds = list(numpy.argsort(-y)[0:multiplier]) final_inds = sorted(set(incr_inds + max_inds)) x = [] t = [] enqueued = [] admin = [] dbGet = [] dbSet = [] compute = [] max_y_value = 0. min_y_value = float('inf') for idx in final_inds: item = list_of_log_dict[idx] x.append(idx + 1) t.append(str(item.get('timestamp', ''))) _alg_duration = item.get('duration', 0.) _alg_duration_dbGet = item.get('duration_dbGet', 0.) _alg_duration_dbSet = item.get('duration_dbSet', 0.) _duration_enqueued = item.get('duration_enqueued', 0.) _app_duration = item.get('app_duration', 0.) if (_app_duration + _duration_enqueued) > max_y_value: max_y_value = _app_duration + _duration_enqueued if (_app_duration + _duration_enqueued) < min_y_value: min_y_value = _app_duration + _duration_enqueued enqueued.append(_duration_enqueued) admin.append(_app_duration - _alg_duration) dbSet.append(_alg_duration_dbSet) dbGet.append(_alg_duration_dbGet) compute.append(_alg_duration - _alg_duration_dbSet - _alg_duration_dbGet) try: min_x = min(x) max_x = max(x) except: min_x = 0. max_x = 0. fig, ax = plt.subplots(subplot_kw=dict(axisbg='#EEEEEE')) stack_coll = ax.stackplot(x, compute, dbGet, dbSet, admin, enqueued, alpha=.5) ax.set_xlabel('API Call') ax.set_ylabel('Duration (s)') ax.set_xlim([min_x, max_x]) ax.set_ylim([0., max_y_value]) ax.grid(color='white', linestyle='solid') ax.set_title(alg_label + ' - ' + task, size=14) proxy_rects = [ plt.Rectangle((0, 0), 1, 1, alpha=.5, fc=pc.get_facecolor()[0]) for pc in stack_coll ] legend = ax.legend(proxy_rects, ['compute', 'dbGet', 'dbSet', 'admin', 'enqueued'], loc=2, ncol=3, mode="expand") for label in legend.get_texts(): label.set_fontsize('small') plot_dict = mpld3.fig_to_dict(fig) plt.close() return plot_dict
def processAnswer(self,exp_uid,args_json,db,ell): """ reporting back the reward of pulling the arm suggested by getQuery Expected input (in json structure with string keys): (index) index_winner : index of the winner in (must be index of left or right target in target_indices) (str) query_uid : unique identifier of query Expected output (comma separated): if error: return (JSON) '{}', (bool) False, (str) error else: return (JSON) '{}', (bool) True,'' """ try: app_id = self.app_id log_entry = { 'exp_uid':exp_uid,'task':'processAnswer','json':args_json,'timestamp':utils.datetimeNow() } ell.log( app_id+':APP-CALL', log_entry ) # convert args_json to args_dict try: args_dict = json.loads(args_json) except: error = "%s.processAnswer input args_json is in improper format" % self.app_id return '{}',False,error # check for the fields that must be contained in args or error occurs necessary_fields = ['index_winner','query_uid'] for field in necessary_fields: try: args_dict[field] except KeyError: error = "%s.processAnswer input arguments missing field: %s" % (self.app_id,str(field)) return '{}',False,error # get list of algorithms associated with project alg_list,didSucceed,message = db.get(app_id+':experiments',exp_uid,'alg_list') # get alg_id query_uid = args_dict['query_uid'] alg_uid,didSucceed,message = db.get(app_id+':queries',query_uid,'alg_uid') if not didSucceed: raise Exception("Failed to retrieve query with query_uid="+query_uid) for algorithm in alg_list: if alg_uid == algorithm['alg_uid']: alg_id = algorithm['alg_id'] alg_label = algorithm['alg_label'] test_alg_label = algorithm['test_alg_label'] num_reported_answers,didSucceed,message = db.increment(app_id+':experiments',exp_uid,'num_reported_answers_for_'+alg_uid) # get sandboxed database for the specific app_id,alg_id,exp_uid - closing off the rest of the database to the algorithm rc = ResourceClient(app_id,exp_uid,alg_uid,db) # get specific algorithm to make calls to alg = utils.get_app_alg(self.app_id,alg_id) targets,didSucceed,message = db.get(app_id+':queries',query_uid,'target_indices') for target in targets: if target['label'] == 'center': index_center = target['index'] elif target['label'] == 'left': index_left = target['index'] elif target['label'] == 'right': index_right = target['index'] index_winner = args_dict['index_winner'] # update query doc timestamp_query_generated,didSucceed,message = db.get(app_id+':queries',query_uid,'timestamp_query_generated') datetime_query_generated = utils.str2datetime(timestamp_query_generated) timestamp_answer_received = args_dict.get('meta',{}).get('timestamp_answer_received',None) if timestamp_answer_received == None: datetime_answer_received = datetime_query_generated else: datetime_answer_received = utils.str2datetime(timestamp_answer_received) delta_datetime = datetime_answer_received - datetime_query_generated round_trip_time = delta_datetime.seconds + delta_datetime.microseconds/1000000. response_time = float(args_dict.get('response_time',0.)) db.set(app_id+':queries',query_uid,'response_time',response_time) db.set(app_id+':queries',query_uid,'network_delay',round_trip_time-response_time) db.set(app_id+':queries',query_uid,'index_winner',index_winner) q = [index_left,index_right,index_center] if index_winner==index_right: q = [index_right,index_left,index_center] db.set(app_id+':queries',query_uid,'q',q) # call processAnswer didSucceed,dt = utils.timeit(alg.processAnswer)(resource=rc,index_center=index_center,index_left=index_left,index_right=index_right,index_winner=index_winner) log_entry_durations = { 'exp_uid':exp_uid,'alg_uid':alg_uid,'task':'processAnswer','duration':dt } log_entry_durations.update( rc.getDurations() ) meta = {'log_entry_durations':log_entry_durations} # check if we're going to evaluate this loss n,didSucceed,message = db.get(app_id+':experiments',exp_uid,'n') if num_reported_answers % ((n+4)/4) == 0: predict_id = 'get_embedding' params = {'alg_label':alg_label} predict_args_dict = {'predict_id':predict_id,'params':params} predict_args_json = json.dumps(predict_args_dict) db.submit_job(app_id,exp_uid,'predict',predict_args_json,ignore_result=True) ############### response_args_dict = {} args_out = {'args':response_args_dict,'meta':meta} response_json = json.dumps(args_out) log_entry = { 'exp_uid':exp_uid,'task':'processAnswer','json':response_json,'timestamp':utils.datetimeNow() } ell.log( app_id+':APP-RESPONSE', log_entry ) return response_json,True,"" except Exception, err: error = traceback.format_exc() log_entry = { 'exp_uid':exp_uid,'task':'processAnswer','error':error,'timestamp':utils.datetimeNow() } ell.log( app_id+':APP-EXCEPTION', log_entry ) return '{}',False,error
def test_error_multiline_plot(self, app, butler): """ Description: Returns multiline plot where there is a one-to-one mapping lines to algorithms and each line indicates the error on the validation set with respect to number of reported answers Expected input: None Expected output (in dict): (dict) MPLD3 plot dictionary """ print('\n in multi line plot functino') args = butler.experiment.get(key='args') alg_list = args['alg_list'] test_alg_label = alg_list[0]['test_alg_label'] test_queries, didSucceed, message = butler.db.get_docs_with_filter( app.app_id + ':queries', { 'exp_uid': app.exp_uid, 'alg_label': test_alg_label }) x_min = numpy.float('inf') x_max = -numpy.float('inf') y_min = numpy.float('inf') y_max = -numpy.float('inf') list_of_alg_dicts = [] for algorithm in alg_list: print('\n doing something for :', algorithm) alg_label = algorithm['alg_label'] list_of_log_dict, didSucceed, message = self.ell.get_logs_with_filter( app.app_id + ':ALG-EVALUATION', { 'exp_uid': app.exp_uid, 'alg_label': alg_label }) list_of_log_dict = sorted( list_of_log_dict, key=lambda item: utils.str2datetime(item['timestamp'])) x = [] y = [] for item in list_of_log_dict: print('\n calculating ... :', algorithm) num_reported_answers = item['num_reported_answers'] precision = item['precision'] err = int(precision * 100) x.append(num_reported_answers) y.append(err) # this would be taken from a call to get_responses on x = numpy.argsort(x) x = [x[i] for i in x] y = [y[i] for i in x] alg_dict = {} alg_dict['legend_label'] = alg_label alg_dict['x'] = x alg_dict['y'] = y try: x_min = min(x_min, min(x)) x_max = max(x_max, max(x)) y_min = min(y_min, min(y)) y_max = max(y_max, max(y)) except: pass list_of_alg_dicts.append(alg_dict) import matplotlib.pyplot as plt import mpld3 width = 0.8 fig, ax = plt.subplots(subplot_kw=dict(axisbg='#EEEEEE')) for alg_dict in list_of_alg_dicts: ax.plot(alg_dict['x'], alg_dict['y'], label=alg_dict['legend_label']) ax.set_xlabel('Number of held out examples (#)') ax.set_ylabel('Accuracy (%)') ax.set_xlim([x_min, x_max]) ax.set_ylim([y_min - width, y_max + width]) ax.grid(color='white', linestyle='solid') ax.set_title( 'Product Classification Accuracy on Held Out Examples (higher is better)', size=14) legend = ax.legend(loc=2, ncol=3, mode="expand") for label in legend.get_texts(): label.set_fontsize('small') plot_dict = mpld3.fig_to_dict(fig) plt.close() return plot_dict
def processAnswer(self, exp_uid, args_json, db, ell): """ reporting back the reward of pulling the arm suggested by getQuery Expected input (in json structure with string keys): (str) query_uid : unique identifier of query (int) index_winner : index of arm must be {index_left,index_right} Expected output (comma separated): if error: return (JSON) '{}', (bool) False, (str) error else: return (JSON) '{}', (bool) True,'' Usage: processAnswer_args_json,didSucceed,message = app.processAnswer(db_API,exp_uid,processAnswer_args_json) Example input: processAnswer_args_json = {"query_uid": "4d02a9924f92138287edd17ca5feb6e1", "index_winner": 8} Example output: processAnswer_response_json = {} """ try: app_id = self.app_id log_entry = { 'exp_uid': exp_uid, 'task': 'processAnswer', 'json': args_json, 'timestamp': utils.datetimeNow() } ell.log(app_id + ':APP-CALL', log_entry) # convert args_json to args_dict try: args_dict = json.loads(args_json) except: error = "%s.processAnswer input args_json is in improper format" % self.app_id return '{}', False, error # check for the fields that must be contained in args or error occurs necessary_fields = ['index_winner', 'query_uid'] for field in necessary_fields: try: args_dict[field] except KeyError: error = "%s.processAnswer input arguments missing field: %s" % ( self.app_id, str(field)) return '{}', False, error # get list of algorithms associated with project alg_list, didSucceed, message = db.get(app_id + ':experiments', exp_uid, 'alg_list') # get alg_id query_uid = args_dict['query_uid'] alg_uid, didSucceed, message = db.get(app_id + ':queries', query_uid, 'alg_uid') for algorithm in alg_list: if alg_uid == algorithm['alg_uid']: alg_id = algorithm['alg_id'] alg_label = algorithm['alg_label'] num_reported_answers, didSucceed, message = db.increment( app_id + ':experiments', exp_uid, 'num_reported_answers_for_' + alg_uid) # get sandboxed database for the specific app_id,alg_id,exp_uid - closing off the rest of the database to the algorithm rc = ResourceClient(app_id, exp_uid, alg_uid, db) # get specific algorithm to make calls to alg = utils.get_app_alg(self.app_id, alg_id) targets, didSucceed, message = db.get(app_id + ':queries', query_uid, 'target_indices') for target in targets: if target['label'] == 'left': index_left = target['index'] if target['label'] == 'right': index_right = target['index'] if target['flag'] == 1: index_painted = target['index'] index_winner = args_dict['index_winner'] # update query doc timestamp_query_generated, didSucceed, message = db.get( app_id + ':queries', query_uid, 'timestamp_query_generated') datetime_query_generated = utils.str2datetime( timestamp_query_generated) timestamp_answer_received = args_dict.get('meta', {}).get( 'timestamp_answer_received', None) if timestamp_answer_received == None: datetime_answer_received = datetime_query_generated else: datetime_answer_received = utils.str2datetime( timestamp_answer_received) delta_datetime = datetime_answer_received - datetime_query_generated round_trip_time = delta_datetime.seconds + delta_datetime.microseconds / 1000000. response_time = float(args_dict.get('response_time', 0.)) db.set(app_id + ':queries', query_uid, 'response_time', response_time) db.set(app_id + ':queries', query_uid, 'network_delay', round_trip_time - response_time) db.set(app_id + ':queries', query_uid, 'index_winner', index_winner) # call processAnswer didSucceed, dt = utils.timeit(alg.processAnswer)( resource=rc, index_left=index_left, index_right=index_right, index_painted=index_painted, index_winner=index_winner) log_entry_durations = { 'exp_uid': exp_uid, 'alg_uid': alg_uid, 'task': 'processAnswer', 'duration': dt } log_entry_durations.update(rc.getDurations()) meta = {'log_entry_durations': log_entry_durations} ############### predict_id = 'arm_ranking' params = {'alg_label': alg_label} predict_args_dict = {'predict_id': predict_id, 'params': params} predict_args_json = json.dumps(predict_args_dict) db.submit_job(app_id, exp_uid, 'predict', predict_args_json, ignore_result=True) ############### response_args_dict = {} args_out = {'args': response_args_dict, 'meta': meta} response_json = json.dumps(args_out) log_entry = { 'exp_uid': exp_uid, 'task': 'processAnswer', 'json': response_json, 'timestamp': utils.datetimeNow() } ell.log(app_id + ':APP-RESPONSE', log_entry) return response_json, True, "" except Exception, err: error = traceback.format_exc() log_entry = { 'exp_uid': exp_uid, 'task': 'processAnswer', 'error': error, 'timestamp': utils.datetimeNow(), 'args_json': args_json } ell.log(app_id + ':APP-EXCEPTION', log_entry) return '{}', False, error
def test_error_multiline_plot(self, app, butler): """ Description: Returns multiline plot where there is a one-to-one mapping lines to algorithms and each line indicates the error on the validation set with respect to number of reported answers Expected input: None Expected output (in dict): (dict) MPLD3 plot dictionary """ args = butler.experiment.get(key='args') alg_list = args['alg_list'] test_alg_label = alg_list[0]['test_alg_label'] test_queries = butler.db.get_docs_with_filter( app.app_id + ':queries', { 'exp_uid': app.exp_uid, 'alg_label': test_alg_label }) test_S = [(query['target_index'], query['target_label']) for query in test_queries if 'target_index' in query.keys()] targets = butler.targets.get_targetset(app.exp_uid) targets = sorted(targets, key=lambda x: x['target_id']) target_features = [] for target_index in range(len(targets)): target_vec = targets[target_index]['meta']['features'] target_vec.append(1.) target_features.append(target_vec) x_min = numpy.float('inf') x_max = -numpy.float('inf') y_min = numpy.float('inf') y_max = -numpy.float('inf') list_of_alg_dicts = [] for algorithm in alg_list: alg_label = algorithm['alg_label'] list_of_log_dict = self.ell.get_logs_with_filter( app.app_id + ':ALG-EVALUATION', { 'exp_uid': app.exp_uid, 'alg_label': alg_label }) list_of_log_dict = sorted( list_of_log_dict, key=lambda item: utils.str2datetime(item['timestamp'])) x = [] y = [] for item in list_of_log_dict: num_reported_answers = item['num_reported_answers'] weights = item['weights'] err = 0. for q in test_S: estimated_label = numpy.sign( numpy.dot(numpy.array(target_features[q[0]]), numpy.array(weights))) err += estimated_label * q[ 1] < 0. #do the labels agree or not m = float(len(test_S)) err = err / m x.append(num_reported_answers) y.append(err) x = numpy.argsort(x) x = [x[i] for i in x] y = [y[i] for i in x] alg_dict = {} alg_dict['legend_label'] = alg_label alg_dict['x'] = x alg_dict['y'] = y try: x_min = min(x_min, min(x)) x_max = max(x_max, max(x)) y_min = min(y_min, min(y)) y_max = max(y_max, max(y)) except: pass list_of_alg_dicts.append(alg_dict) import matplotlib.pyplot as plt import mpld3 fig, ax = plt.subplots(subplot_kw=dict(axisbg='#EEEEEE')) for alg_dict in list_of_alg_dicts: ax.plot(alg_dict['x'], alg_dict['y'], label=alg_dict['legend_label']) ax.set_xlabel('Number of answered queries') ax.set_ylabel('Error on hold-out set') ax.set_xlim([x_min, x_max]) ax.set_ylim([y_min, y_max]) ax.grid(color='white', linestyle='solid') ax.set_title('Test Error', size=14) legend = ax.legend(loc=2, ncol=3, mode="expand") for label in legend.get_texts(): label.set_fontsize('small') plot_dict = mpld3.fig_to_dict(fig) plt.close() return plot_dict
def test_error_multiline_plot(self, app, butler): """ Description: Returns multiline plot where there is a one-to-one mapping lines to algorithms and each line indicates the error on the validation set with respect to number of reported answers Expected input: None Expected output (in dict): (dict) MPLD3 plot dictionary """ # get list of algorithms associated with project args = butler.experiment.get(key='args') test_alg_label = args['alg_list'][0]['test_alg_label'] test_S = butler.queries.get(pattern={ 'exp_uid': app.exp_uid, 'alg_label': test_alg_label }) x_min = numpy.float('inf') x_max = -numpy.float('inf') y_min = numpy.float('inf') y_max = -numpy.float('inf') list_of_alg_dicts = [] for algorithm in args['alg_list']: alg_label = algorithm['alg_label'] list_of_log_dict, didSucceed, message = butler.ell.get_logs_with_filter( app.app_id + ':ALG-EVALUATION', { 'exp_uid': app.exp_uid, 'alg_label': alg_label }) list_of_log_dict = sorted( list_of_log_dict, key=lambda item: utils.str2datetime(item['timestamp'])) x = [] y = [] for item in list_of_log_dict: num_reported_answers = item['num_reported_answers'] Xd = item['X'] err = 0.5 if len(test_S) > 0: # compute error rate number_correct = 0. for query in test_S: if 'q' in query: i, j, k = query['q'] score = numpy.dot(Xd[j], Xd[j]) - 2 * numpy.dot( Xd[j], Xd[k]) + 2 * numpy.dot( Xd[i], Xd[k]) - numpy.dot(Xd[i], Xd[i]) if score > 0: number_correct += 1.0 accuracy = number_correct / len(test_S) err = 1.0 - accuracy x.append(num_reported_answers) y.append(err) alg_dict = {'legend_label': alg_label, 'x': x, 'y': y} try: x_min = min(x_min, min(x)) x_max = max(x_max, max(x)) y_min = min(y_min, min(y)) y_max = max(y_max, max(y)) except: pass list_of_alg_dicts.append(alg_dict) import matplotlib.pyplot as plt import mpld3 fig, ax = plt.subplots(subplot_kw=dict(axisbg='#EEEEEE')) for alg_dict in list_of_alg_dicts: ax.plot(alg_dict['x'], alg_dict['y'], label=alg_dict['legend_label']) ax.set_xlabel('Number of answered triplets') ax.set_ylabel('Error on hold-out set') ax.set_xlim([x_min, x_max]) ax.set_ylim([y_min, y_max]) ax.grid(color='white', linestyle='solid') ax.set_title('Triplet Test Error', size=14) legend = ax.legend(loc=2, ncol=3, mode="expand") for label in legend.get_texts(): label.set_fontsize('small') plot_dict = mpld3.fig_to_dict(fig) plt.close() return plot_dict
def compute_duration_detailed_stacked_area_plot(self,app,butler,task,alg_label,detailedDB=False): """ Description: Returns stacked area plot for a particular algorithm and task where the durations are broken down into compute,db_set,db_get (for cpu, database_set, database_get) Expected input: (string) task : must be in {'getQuery','processAnswer','predict'} (string) alg_label : must be a valid alg_label contained in alg_list list of dicts Expected output (in dict): (dict) MPLD3 plot dictionary """ list_of_log_dict,didSucceed,message = butler.ell.get_logs_with_filter(app.app_id+':ALG-DURATION', {'exp_uid':app.exp_uid,'alg_label':alg_label,'task':task}) list_of_log_dict = sorted(list_of_log_dict, key=lambda item: utils.str2datetime(item['timestamp']) ) y = [] for item in list_of_log_dict: y.append( item.get('app_duration',0.) + item.get('duration_enqueued',0.) ) y = numpy.array(y) num_items = len(list_of_log_dict) multiplier = min(num_items,MAX_SAMPLES_PER_PLOT) incr_inds = [ k*num_items/multiplier for k in range(multiplier)] max_inds = list(numpy.argsort(-y)[0:multiplier]) final_inds = sorted(set(incr_inds + max_inds)) x = [] t = [] enqueued = [] admin = [] dbGet = [] dbSet = [] compute = [] max_y_value = 0. min_y_value = float('inf') for idx in final_inds: item = list_of_log_dict[idx] x.append(idx+1) t.append(str(item.get('timestamp',''))) _alg_duration = item.get('duration',0.) _alg_duration_dbGet = item.get('duration_dbGet',0.) _alg_duration_dbSet = item.get('duration_dbSet',0.) _duration_enqueued = item.get('duration_enqueued',0.) _app_duration = item.get('app_duration',0.) if (_app_duration+_duration_enqueued) > max_y_value: max_y_value = _app_duration + _duration_enqueued if (_app_duration+_duration_enqueued) < min_y_value: min_y_value = _app_duration + _duration_enqueued enqueued.append(_duration_enqueued) admin.append(_app_duration-_alg_duration) dbSet.append(_alg_duration_dbSet) dbGet.append(_alg_duration_dbGet) compute.append( _alg_duration - _alg_duration_dbSet - _alg_duration_dbGet ) try: min_x = min(x) max_x = max(x) except: min_x = 0. max_x = 0. fig, ax = plt.subplots(subplot_kw=dict(axisbg='#EEEEEE')) stack_coll = ax.stackplot(x,compute,dbGet,dbSet,admin,enqueued, alpha=.5) ax.set_xlabel('API Call') ax.set_ylabel('Duration (s)') ax.set_xlim([min_x,max_x]) ax.set_ylim([0.,max_y_value]) ax.grid(color='white', linestyle='solid') ax.set_title(alg_label+' - '+task, size=14) proxy_rects = [plt.Rectangle((0, 0), 1, 1, alpha=.5,fc=pc.get_facecolor()[0]) for pc in stack_coll] legend = ax.legend(proxy_rects, ['compute','dbGet','dbSet','admin','enqueued'],loc=2,ncol=3,mode="expand") for label in legend.get_texts(): label.set_fontsize('small') plot_dict = mpld3.fig_to_dict(fig) plt.close() return plot_dict
def test_error_multiline_plot(self,app, butler): """ Description: Returns multiline plot where there is a one-to-one mapping lines to algorithms and each line indicates the error on the validation set with respect to number of reported answers Expected input: None Expected output (in dict): (dict) MPLD3 plot dictionary """ args = butler.experiment.get(key='args') alg_list = args['alg_list'] test_alg_label = alg_list[0]['test_alg_label'] test_queries = butler.db.get_docs_with_filter(app.app_id+':queries',{'exp_uid':app.exp_uid, 'alg_label':test_alg_label}) test_S = [(query['target_index'], query['target_label']) for query in test_queries if 'target_index' in query.keys()] targets = butler.targets.get_targetset(app.exp_uid) targets = sorted(targets,key=lambda x: x['target_id']) target_features = [] for target_index in range(len(targets)): target_vec = targets[target_index]['meta']['features'] target_vec.append(1.) target_features.append(target_vec) x_min = numpy.float('inf') x_max = -numpy.float('inf') y_min = numpy.float('inf') y_max = -numpy.float('inf') list_of_alg_dicts = [] for algorithm in alg_list: alg_label = algorithm['alg_label'] list_of_log_dict = self.ell.get_logs_with_filter(app.app_id+':ALG-EVALUATION',{'exp_uid':app.exp_uid, 'alg_label':alg_label}) list_of_log_dict = sorted(list_of_log_dict, key=lambda item: utils.str2datetime(item['timestamp']) ) x = [] y = [] for item in list_of_log_dict: num_reported_answers = item['num_reported_answers'] weights = item['weights'] err = 0. for q in test_S: estimated_label = numpy.sign(numpy.dot( numpy.array(target_features[q[0]]), numpy.array(weights) )) err += estimated_label*q[1]<0. #do the labels agree or not m = float(len(test_S)) err = err/m x.append(num_reported_answers) y.append(err) x = numpy.argsort(x) x = [x[i] for i in x] y = [y[i] for i in x] alg_dict = {} alg_dict['legend_label'] = alg_label alg_dict['x'] = x alg_dict['y'] = y try: x_min = min(x_min,min(x)) x_max = max(x_max,max(x)) y_min = min(y_min,min(y)) y_max = max(y_max,max(y)) except: pass list_of_alg_dicts.append(alg_dict) import matplotlib.pyplot as plt import mpld3 fig, ax = plt.subplots(subplot_kw=dict(axisbg='#EEEEEE')) for alg_dict in list_of_alg_dicts: ax.plot(alg_dict['x'],alg_dict['y'],label=alg_dict['legend_label']) ax.set_xlabel('Number of answered queries') ax.set_ylabel('Error on hold-out set') ax.set_xlim([x_min,x_max]) ax.set_ylim([y_min,y_max]) ax.grid(color='white', linestyle='solid') ax.set_title('Test Error', size=14) legend = ax.legend(loc=2,ncol=3,mode="expand") for label in legend.get_texts(): label.set_fontsize('small') plot_dict = mpld3.fig_to_dict(fig) plt.close() return plot_dict
def compute_duration_multiline_plot(self, app, butler, task): """ Description: Returns multiline plot where there is a one-to-one mapping lines to algorithms and each line indicates the durations to complete the task (wrt to the api call) Expected input: (string) task : must be in {'getQuery','processAnswer','predict'} Expected output (in dict): (dict) MPLD3 plot dictionary """ alg_list = butler.experiment.get(key='args')['alg_list'] x_min = numpy.float('inf') x_max = -numpy.float('inf') y_min = numpy.float('inf') y_max = -numpy.float('inf') list_of_alg_dicts = [] for algorithm in alg_list: alg_label = algorithm['alg_label'] list_of_log_dict = butler.ell.get_logs_with_filter( app.app_id + ':ALG-DURATION', { 'exp_uid': app.exp_uid, 'alg_label': alg_label, 'task': task }) list_of_log_dict = sorted( list_of_log_dict, key=lambda item: utils.str2datetime(item['timestamp'])) x = [] y = [] t = [] k = 0 for item in list_of_log_dict: k += 1 x.append(k) y.append( item.get('app_duration', 0.) + item.get('duration_enqueued', 0.)) t.append(str(item['timestamp'])[:-3]) x = numpy.array(x) y = numpy.array(y) t = numpy.array(t) num_items = len(list_of_log_dict) multiplier = min(num_items, MAX_SAMPLES_PER_PLOT) incr_inds = [r * num_items / multiplier for r in range(multiplier)] max_inds = list(numpy.argsort(-y)[0:multiplier]) final_inds = sorted(set(incr_inds + max_inds)) x = list(x[final_inds]) y = list(y[final_inds]) t = list(t[final_inds]) alg_dict = {} alg_dict['legend_label'] = alg_label alg_dict['x'] = x alg_dict['y'] = y alg_dict['t'] = t try: x_min = min(x_min, min(x)) x_max = max(x_max, max(x)) y_min = min(y_min, min(y)) y_max = max(y_max, max(y)) except: pass list_of_alg_dicts.append(alg_dict) return_dict = {} return_dict['data'] = list_of_alg_dicts return_dict['plot_type'] = 'multi_line_plot' return_dict['x_label'] = 'API Call' return_dict['x_min'] = x_min return_dict['x_max'] = x_max return_dict['y_label'] = 'Duration (s)' return_dict['y_min'] = y_min return_dict['y_max'] = y_max fig, ax = plt.subplots(subplot_kw=dict(axisbg='#EEEEEE')) for alg_dict in list_of_alg_dicts: ax.plot(alg_dict['x'], alg_dict['y'], label=alg_dict['legend_label']) ax.set_xlabel('API Call') ax.set_ylabel('Duration (s)') ax.set_xlim([x_min, x_max]) ax.set_ylim([y_min, y_max]) ax.grid(color='white', linestyle='solid') ax.set_title(task, size=14) legend = ax.legend(loc=2, ncol=3, mode="expand") for label in legend.get_texts(): label.set_fontsize('small') plot_dict = mpld3.fig_to_dict(fig) plt.close() return plot_dict