コード例 #1
0
ファイル: App.py プロジェクト: widoptimization-willett/NEXT
    def processAnswer(self, exp_uid, args_json):
        try:
            args_dict = self.helper.convert_json(args_json)
            args_dict = verifier.verify(args_dict, self.reference_dict['processAnswer']['args'])
            # Update timing info in query
            query = self.butler.queries.get(uid=args_dict['args']['query_uid'])
            timestamp_answer_received = args_dict['args'].get('timestamp_answer_received', None)
            delta_datetime = utils.str2datetime(timestamp_answer_received) - \
                             utils.str2datetime(query['timestamp_query_generated'])
            round_trip_time = delta_datetime.total_seconds()
            response_time = float(args_dict['args'].get('response_time',0.))

            query_update = self.call_app_fn(query['alg_label'], query['alg_id'], 'processAnswer', args_dict)
            query_update.update({'response_time':response_time,
                                 'network_delay':round_trip_time - response_time,
                                 'timestamp_answer_received': timestamp_answer_received
                                 })
            self.butler.queries.set_many(uid=args_dict['args']['query_uid'],key_value_dict=query_update)

            return json.dumps({'args': {}, 'meta': {'log_entry_durations':self.log_entry_durations}}), True, ''
        
        except Exception, error:
            exc_type, exc_value, exc_traceback = sys.exc_info()
            full_error = str(traceback.format_exc())+'\n'+str(error)
            utils.debug_print("processAnswer Exception: " + full_error, color='red')
            log_entry = { 'exp_uid':exp_uid,'task':'processAnswer','error':full_error,'timestamp':utils.datetimeNow(),'args_json':args_json } 
            self.butler.ell.log( self.app_id+':APP-EXCEPTION', log_entry  )
    	    traceback.print_tb(exc_traceback)
    	    raise Exception(error)
コード例 #2
0
ファイル: App.py プロジェクト: nextml/NEXT
    def processAnswer(self, exp_uid, args_json):
        try:
            args_dict = self.helper.convert_json(args_json)
            args_dict = verifier.verify(args_dict, self.reference_dict['processAnswer']['args'])
            # Update timing info in query
            query = self.butler.queries.get(uid=args_dict['args']['query_uid'])
            timestamp_answer_received = args_dict['args'].get('timestamp_answer_received', None)
            delta_datetime = utils.str2datetime(timestamp_answer_received) - \
                             utils.str2datetime(query['timestamp_query_generated'])
            round_trip_time = delta_datetime.total_seconds()
            response_time = float(args_dict['args'].get('response_time',0.))

            query_update = self.call_app_fn(query['alg_label'], query['alg_id'], 'processAnswer', args_dict)
            query_update.update({'response_time':response_time,
                                 'network_delay':round_trip_time - response_time,
                                 'timestamp_answer_received': timestamp_answer_received
                                 })
            self.butler.queries.set_many(uid=args_dict['args']['query_uid'],key_value_dict=query_update)

            return json.dumps({'args': {}, 'meta': {'log_entry_durations':self.log_entry_durations}}), True, ''

        except Exception, error:
            exc_type, exc_value, exc_traceback = sys.exc_info()
            full_error = str(traceback.format_exc())+'\n'+str(error)
            utils.debug_print("processAnswer Exception: " + full_error, color='red')
            log_entry = { 'exp_uid':exp_uid,'task':'processAnswer','error':full_error,'timestamp':utils.datetimeNow(),'args_json':args_json }
            self.butler.ell.log( self.app_id+':APP-EXCEPTION', log_entry  )
    	    traceback.print_tb(exc_traceback)
    	    raise Exception(error)
コード例 #3
0
ファイル: AppDashboard.py プロジェクト: Ahtidevin/NEXT-1
    def api_activity_histogram(self, app, butler):
        """
    Description: returns the data to plot all API activity (for all algorithms) in a histogram with respect to time for any task in {getQuery,processAnswer,predict}

    Expected output (in dict):
      (dict) MPLD3 plot dictionary
    """
        queries = butler.queries.get(pattern={'exp_uid': app.exp_uid})
        #self.db.get_docs_with_filter(app_id+':queries',{'exp_uid':exp_uid})
        start_date = utils.str2datetime(
            butler.admin.get(uid=app.exp_uid)['start_date'])
        numerical_timestamps = [
            (utils.str2datetime(item['timestamp_query_generated']) -
             start_date).total_seconds() for item in queries
        ]
        fig, ax = plt.subplots(subplot_kw=dict(axisbg='#FFFFFF'),
                               figsize=(12, 1.5))
        ax.hist(numerical_timestamps,
                min(int(1 + 4 * numpy.sqrt(len(numerical_timestamps))), 300),
                alpha=0.5,
                color='black')
        ax.set_frame_on(False)
        ax.get_xaxis().set_ticks([])
        ax.get_yaxis().set_ticks([])
        ax.get_yaxis().set_visible(False)
        ax.set_xlim(0, max(numerical_timestamps))
        plot_dict = mpld3.fig_to_dict(fig)
        plt.close()
        return plot_dict
コード例 #4
0
ファイル: dashboard.py プロジェクト: aybuketurker/NEXT-psych
def experiment_dashboard(exp_uid, app_id, exp_key):
    """
    Endpoint that renders the experiment dashboard.

    Inputs: ::\n
    	(string) exp_uid, exp_uid for a current experiment.
    """
    # Not a particularly good way to do this.
    alg_label_list = rm.get_algs_for_exp_uid(exp_uid)

    exp_start_data = rm.get_app_exp_uid_start_date(exp_uid) + ' UTC'
    participant_uids = rm.get_participant_uids(exp_uid)
    num_participants = len(participant_uids)
    last_datetime = None
    num_queries = 0
    for participant_uid in participant_uids:
        queries = rm.get_participant_data(participant_uid, exp_uid)
        num_queries += len(queries)

        this_datetime = utils.str2datetime(
            queries[-1]['timestamp_query_generated'])
        if last_datetime == None or this_datetime > last_datetime:
            last_datetime = this_datetime
    last_activity = utils.datetime2str(last_datetime) + ' UTC'

    # Migrate this code to use keychain
    docs, didSucceed, message = db.getDocsByPattern('next_frontend_base',
                                                    'keys', {
                                                        'object_id': exp_uid,
                                                        'type': 'perm'
                                                    })
    perm_key = docs[0]['_id']
    alg_list = [{
        'alg_label': alg['alg_label'],
        'alg_label_clean': '_'.join(alg['alg_label'].split())
    } for alg in alg_label_list]

    if (constants.NEXT_BACKEND_GLOBAL_HOST
            and constants.NEXT_BACKEND_GLOBAL_PORT):
        host_url = 'http://{}:{}'.format(constants.NEXT_BACKEND_GLOBAL_HOST,
                                         constants.NEXT_BACKEND_GLOBAL_PORT)
    else:
        host_url = ''
    print 'host_url', host_url
    print constants.NEXT_BACKEND_GLOBAL_HOST, constants.NEXT_BACKEND_GLOBAL_PORT
    env = Environment(loader=ChoiceLoader([
        PackageLoader('next.apps.{}'.format(app_id), 'dashboard'),
        PackageLoader('next.dashboard', 'templates')
    ]))
    template = env.get_template('{}.html'.format(app_id))
    return template.render(app_id=app_id,
                           exp_uid=exp_uid,
                           alg_list=alg_list,
                           host_url=host_url,
                           perm_key=perm_key,
                           url_for=url_for,
                           exp_start_data=exp_start_data,
                           num_participants=num_participants,
                           num_queries=num_queries,
                           last_activity=last_activity)
コード例 #5
0
ファイル: AppDashboard.py プロジェクト: dconathan/NEXT
  def api_activity_histogram(self, app, butler):
    """
    Description: returns the data to plot all API activity (for all algorithms) in a histogram with respect to time for any task in {getQuery,processAnswer,predict}

    Expected output (in dict):
      (dict) MPLD3 plot dictionary
    """
    queries = butler.queries.get(pattern={'exp_uid':app.exp_uid})
    #self.db.get_docs_with_filter(app_id+':queries',{'exp_uid':exp_uid})
    start_date = utils.str2datetime(butler.admin.get(uid=app.exp_uid)['start_date'])
    numerical_timestamps = [(utils.str2datetime(item['timestamp_query_generated'])-start_date).total_seconds() 
                                for item in queries]
    fig, ax = plt.subplots(subplot_kw=dict(axisbg='#FFFFFF'),figsize=(12,1.5))
    ax.hist(numerical_timestamps,min(int(1+4*numpy.sqrt(len(numerical_timestamps))),300),alpha=0.5,color='black')
    ax.set_frame_on(False)
    ax.get_xaxis().set_ticks([])
    ax.get_yaxis().set_ticks([])
    ax.get_yaxis().set_visible(False)
    ax.set_xlim(0, max(numerical_timestamps))
    plot_dict = mpld3.fig_to_dict(fig)
    plt.close()
    return plot_dict
コード例 #6
0
ファイル: resource_manager.py プロジェクト: yazici/NEXT
    def get_app_exp_uid_start_date(self,exp_uid):
        """
        Returns date in a string when experiment was initiazlied

        Inputs: ::\n
            (string) exp_uid : unique experiment identifier

        Outputs: ::\n
            (datetime) start_date : start date in datetime format

        Usage: ::\n
            rm.get_app_exp_uid_start_date('PoolBasedTripletMDS')
        """

        start_date = db.get('experiments_admin',exp_uid,'start_date')

        if isinstance(start_date, datetime):
            return start_date
        else:
            return utils.str2datetime(start_date)
コード例 #7
0
ファイル: Dashboard.py プロジェクト: dconathan/NEXT
    def test_error_multiline_plot(self, app, butler):
        """
        Description: Returns multiline plot where there is a one-to-one mapping lines to
        algorithms and each line indicates the error on the validation set with respect to number of reported answers

        Expected input:
          None

        Expected output (in dict):
          (dict) MPLD3 plot dictionary
        """
        # get list of algorithms associated with project
        args = butler.experiment.get(key='args')        
        test_alg_label = args['alg_list'][0]['test_alg_label']

        test_S = butler.queries.get(pattern={'exp_uid':app.exp_uid, 'alg_label':test_alg_label})
        x_min = numpy.float('inf')
        x_max = -numpy.float('inf')
        y_min = numpy.float('inf')
        y_max = -numpy.float('inf')
        list_of_alg_dicts = []

        for algorithm in args['alg_list']:
            alg_label = algorithm['alg_label']
            list_of_log_dict,didSucceed,message = butler.ell.get_logs_with_filter(app.app_id+':ALG-EVALUATION',{'exp_uid':app.exp_uid, 'alg_label':alg_label})
            list_of_log_dict = sorted(list_of_log_dict, key=lambda item: utils.str2datetime(item['timestamp']) )
            x = []
            y = []
            for item in list_of_log_dict:
                num_reported_answers = item['num_reported_answers']
                Xd = item['X']
                err = 0.5
                if len(test_S)>0:
                    # compute error rate
                    number_correct = 0.
                    for query in test_S:
                        if 'q' in query:
                            i, j, k = query['q']
                            score =  numpy.dot(Xd[j],Xd[j]) -2*numpy.dot(Xd[j],Xd[k]) + 2*numpy.dot(Xd[i],Xd[k]) - numpy.dot(Xd[i],Xd[i])
                            if score > 0:
                                number_correct += 1.0

                    accuracy = number_correct/len(test_S)
                    err = 1.0-accuracy
                x.append(num_reported_answers)
                y.append(err)
            alg_dict = {'legend_label':alg_label, 'x':x,'y':y}
            try:
                x_min = min(x_min,min(x))
                x_max = max(x_max,max(x))
                y_min = min(y_min,min(y))
                y_max = max(y_max,max(y))
            except:
                pass
            list_of_alg_dicts.append(alg_dict)

        import matplotlib.pyplot as plt
        import mpld3
        fig, ax = plt.subplots(subplot_kw=dict(axisbg='#EEEEEE'))
        for alg_dict in list_of_alg_dicts:
            ax.plot(alg_dict['x'],alg_dict['y'],label=alg_dict['legend_label'])
        ax.set_xlabel('Number of answered triplets')
        ax.set_ylabel('Error on hold-out set')
        ax.set_xlim([x_min,x_max])
        ax.set_ylim([y_min,y_max])
        ax.grid(color='white', linestyle='solid')
        ax.set_title('Triplet Test Error', size=14)
        legend = ax.legend(loc=2,ncol=3,mode="expand")
        for label in legend.get_texts():
          label.set_fontsize('small')
        plot_dict = mpld3.fig_to_dict(fig)
        plt.close()
        return plot_dict
コード例 #8
0
  def reportAnswer(self,exp_uid,args_json,db,ell):
    """
    reporting back the reward of pulling the arm suggested by getQuery

    Expected input:
      (str) query_uid : unique identifier of query
      (int) index_winner : index of arm must be one of the indices given by getQuery

    Expected output (comma separated): 
      if error:
        return (JSON) '{}', (bool) False, (str) error
      else:
        return (JSON) '{}', (bool) True,''

    Usage:
      reportAnswer_args_json,didSucceed,message = app.reportAnswer(exp_uid,reportAnswer_args_json)

    Example input:
      reportAnswer_args_json = {"query_uid": "4d02a9924f92138287edd17ca5feb6e1", "index_winner": 8}

    Example output:
      reportAnswer_response_json = {}
    """

    try:
      app_id = self.app_id

      log_entry = { 'exp_uid':exp_uid,'task':'reportAnswer','json':args_json,'timestamp':utils.datetimeNow() } 
      ell.log( app_id+':APP-CALL', log_entry  )

      # convert args_json to args_dict
      try:
        args_dict = json.loads(args_json)
      except:
        error = "%s.reportAnswer input args_json is in improper format" % self.app_id
        return '{}',False,error

      # check for the fields that must be contained in args or error occurs
      necessary_fields = ['index_winner','query_uid']
      for field in necessary_fields:
        try:
          args_dict[field]
        except KeyError:
          error = "%s.reportAnswer input arguments missing field: %s" % (self.app_id,str(field)) 
          return '{}',False,error

      # get list of algorithms associated with project
      alg_list,didSucceed,message = db.get(app_id+':experiments',exp_uid,'alg_list')

      # get alg_id
      query_uid = args_dict['query_uid']
      alg_uid,didSucceed,message = db.get(app_id+':queries',query_uid,'alg_uid')
      for algorithm in alg_list:
        if alg_uid == algorithm['alg_uid']:
          alg_id = algorithm['alg_id']
          alg_label = algorithm['alg_label']
          num_reported_answers,didSucceed,message = db.increment(app_id+':experiments',exp_uid,'num_reported_answers_for_'+alg_uid)

      # get sandboxed database for the specific app_id,alg_id,exp_uid - closing off the rest of the database to the algorithm
      rc = ResourceClient(app_id,exp_uid,alg_uid,db)

      # get specific algorithm to make calls to 
      alg = utils.get_app_alg(self.app_id,alg_id)

      # get targets associated with the specific query
      targets,didSucceed,message = db.get(app_id+':queries',query_uid,'target_indices')

      target_indices = []
      for target in targets:
        target_indices.append(target['index'])

      # get the index winner
      index_winner = args_dict['index_winner']

      # update query doc
      timestamp_query_generated,didSucceed,message = db.get(app_id+':queries',query_uid,'timestamp_query_generated')
      datetime_query_generated = utils.str2datetime(timestamp_query_generated)
      timestamp_answer_received = args_dict.get('meta',{}).get('timestamp_answer_received',None)
      if timestamp_answer_received == None:
        datetime_answer_received = datetime_query_generated
      else:
        datetime_answer_received = utils.str2datetime(timestamp_answer_received)
      delta_datetime = datetime_answer_received - datetime_query_generated
      round_trip_time = delta_datetime.seconds + delta_datetime.microseconds/1000000.
      response_time = float(args_dict.get('response_time',0.))
      db.set(app_id+':queries',query_uid,'response_time',response_time)
      db.set(app_id+':queries',query_uid,'network_delay',round_trip_time-response_time)
      db.set(app_id+':queries',query_uid,'index_winner',index_winner)

      # call reportAnswer
      didSucceed,dt = utils.timeit(alg.reportAnswer)(resource=rc,targets=target_indices,index_winner=index_winner)

      log_entry_durations = { 'exp_uid':exp_uid,'alg_uid':alg_uid,'task':'reportAnswer','duration':dt } 
      log_entry_durations.update( rc.getDurations() )
      meta = {'log_entry_durations':log_entry_durations}

      # calling predict 
      ###############
      predict_id = 'arm_ranking'
      params = {'alg_label':alg_label}
      predict_args_dict = {'predict_id':predict_id,'params':params}
      predict_args_json = json.dumps(predict_args_dict)
      
      db.submit_job(app_id,exp_uid,'predict',predict_args_json,ignore_result=True)
      ###############

      response_args_dict = {}
      args_out = {'args':response_args_dict,'meta':meta}
      response_json = json.dumps(args_out)

      log_entry = { 'exp_uid':exp_uid,'task':'reportAnswer','json':response_json,'timestamp':utils.datetimeNow() } 
      ell.log( app_id+':APP-RESPONSE', log_entry  )

      return response_json,True,""
    except Exception, err:
      error = traceback.format_exc()
      log_entry = { 'exp_uid':exp_uid,'task':'reportAnswer','error':error,'timestamp':utils.datetimeNow(),'args_json':args_json }  
      ell.log( app_id+':APP-EXCEPTION', log_entry  )
      return '{}',False,error
コード例 #9
0
ファイル: AppDashboard.py プロジェクト: dconathan/NEXT
  def compute_duration_multiline_plot(self, app, butler, task):
    """
    Description: Returns multiline plot where there is a one-to-one mapping lines to
    algorithms and each line indicates the durations to complete the task (wrt to the api call)

    Expected input:
      (string) task :  must be in {'getQuery','processAnswer','predict'}

    Expected output (in dict):
      (dict) MPLD3 plot dictionary
    """

    alg_list = butler.experiment.get(key='args')['alg_list']
    x_min = numpy.float('inf')
    x_max = -numpy.float('inf')
    y_min = numpy.float('inf')
    y_max = -numpy.float('inf')
    list_of_alg_dicts = []

    for algorithm in alg_list:
      alg_label = algorithm['alg_label']
      list_of_log_dict,didSucceed,message = butler.ell.get_logs_with_filter(app.app_id+':ALG-DURATION',
                                                                            {'exp_uid':app.exp_uid,'alg_label':alg_label,'task':task})
      list_of_log_dict = sorted(list_of_log_dict, key=lambda item: utils.str2datetime(item['timestamp']) )
      
      x = []
      y = []
      t = []
      k=0
      for item in list_of_log_dict:
        k+=1
        x.append(k)
        y.append( item.get('app_duration',0.) + item.get('duration_enqueued',0.) )
        t.append(str(item['timestamp'])[:-3])
        
      x = numpy.array(x)
      y = numpy.array(y)
      t = numpy.array(t)
      num_items = len(list_of_log_dict)
      multiplier = min(num_items,MAX_SAMPLES_PER_PLOT)
      incr_inds = [ r*num_items/multiplier for r in range(multiplier)]
      max_inds = list(numpy.argsort(-y)[0:multiplier])
      final_inds = sorted(set(incr_inds + max_inds))
      x = list(x[final_inds])
      y = list(y[final_inds])
      t = list(t[final_inds])

      alg_dict = {}
      alg_dict['legend_label'] = alg_label
      alg_dict['x'] = x
      alg_dict['y'] = y
      alg_dict['t'] = t
      try:
        x_min = min(x_min,min(x))
        x_max = max(x_max,max(x))
        y_min = min(y_min,min(y))
        y_max = max(y_max,max(y))
      except:
        pass

      list_of_alg_dicts.append(alg_dict)
      
    return_dict = {}
    return_dict['data'] = list_of_alg_dicts
    return_dict['plot_type'] = 'multi_line_plot'
    return_dict['x_label'] = 'API Call'
    return_dict['x_min'] = x_min
    return_dict['x_max'] = x_max
    return_dict['y_label'] = 'Duration (s)'
    return_dict['y_min'] = y_min
    return_dict['y_max'] = y_max

    fig, ax = plt.subplots(subplot_kw=dict(axisbg='#EEEEEE'))
    for alg_dict in list_of_alg_dicts:
        ax.plot(alg_dict['x'],alg_dict['y'],label=alg_dict['legend_label'])
    ax.set_xlabel('API Call')
    ax.set_ylabel('Duration (s)')
    ax.set_xlim([x_min,x_max])
    ax.set_ylim([y_min,y_max])
    ax.grid(color='white', linestyle='solid')
    ax.set_title(task, size=14)
    legend = ax.legend(loc=2,ncol=3,mode="expand")
    for label in legend.get_texts():
      label.set_fontsize('small')
    plot_dict = mpld3.fig_to_dict(fig)
    plt.close()
    return plot_dict
コード例 #10
0
ファイル: AppDashboard.py プロジェクト: Ahtidevin/NEXT-1
    def compute_duration_detailed_stacked_area_plot(self,
                                                    app,
                                                    butler,
                                                    task,
                                                    alg_label,
                                                    detailedDB=False):
        """
    Description: Returns stacked area plot for a particular algorithm and task where the durations
    are broken down into compute,db_set,db_get (for cpu, database_set, database_get)

    Expected input:
      (string) task :  must be in {'getQuery','processAnswer','predict'}
      (string) alg_label : must be a valid alg_label contained in alg_list list of dicts

    Expected output (in dict):
      (dict) MPLD3 plot dictionary
    """
        list_of_log_dict = butler.ell.get_logs_with_filter(
            app.app_id + ':ALG-DURATION', {
                'exp_uid': app.exp_uid,
                'alg_label': alg_label,
                'task': task
            })
        list_of_log_dict = sorted(
            list_of_log_dict,
            key=lambda item: utils.str2datetime(item['timestamp']))

        y = []
        for item in list_of_log_dict:
            y.append(
                item.get('app_duration', 0.) +
                item.get('duration_enqueued', 0.))
        y = numpy.array(y)
        num_items = len(list_of_log_dict)
        multiplier = min(num_items, MAX_SAMPLES_PER_PLOT)
        incr_inds = [k * num_items / multiplier for k in range(multiplier)]
        max_inds = list(numpy.argsort(-y)[0:multiplier])
        final_inds = sorted(set(incr_inds + max_inds))

        x = []
        t = []
        enqueued = []
        admin = []
        dbGet = []
        dbSet = []
        compute = []

        max_y_value = 0.
        min_y_value = float('inf')
        for idx in final_inds:
            item = list_of_log_dict[idx]
            x.append(idx + 1)
            t.append(str(item.get('timestamp', '')))

            _alg_duration = item.get('duration', 0.)
            _alg_duration_dbGet = item.get('duration_dbGet', 0.)
            _alg_duration_dbSet = item.get('duration_dbSet', 0.)
            _duration_enqueued = item.get('duration_enqueued', 0.)
            _app_duration = item.get('app_duration', 0.)

            if (_app_duration + _duration_enqueued) > max_y_value:
                max_y_value = _app_duration + _duration_enqueued
            if (_app_duration + _duration_enqueued) < min_y_value:
                min_y_value = _app_duration + _duration_enqueued

            enqueued.append(_duration_enqueued)
            admin.append(_app_duration - _alg_duration)
            dbSet.append(_alg_duration_dbSet)
            dbGet.append(_alg_duration_dbGet)
            compute.append(_alg_duration - _alg_duration_dbSet -
                           _alg_duration_dbGet)

        try:
            min_x = min(x)
            max_x = max(x)
        except:
            min_x = 0.
            max_x = 0.

        fig, ax = plt.subplots(subplot_kw=dict(axisbg='#EEEEEE'))
        stack_coll = ax.stackplot(x,
                                  compute,
                                  dbGet,
                                  dbSet,
                                  admin,
                                  enqueued,
                                  alpha=.5)
        ax.set_xlabel('API Call')
        ax.set_ylabel('Duration (s)')
        ax.set_xlim([min_x, max_x])
        ax.set_ylim([0., max_y_value])
        ax.grid(color='white', linestyle='solid')
        ax.set_title(alg_label + ' - ' + task, size=14)
        proxy_rects = [
            plt.Rectangle((0, 0), 1, 1, alpha=.5, fc=pc.get_facecolor()[0])
            for pc in stack_coll
        ]
        legend = ax.legend(proxy_rects,
                           ['compute', 'dbGet', 'dbSet', 'admin', 'enqueued'],
                           loc=2,
                           ncol=3,
                           mode="expand")
        for label in legend.get_texts():
            label.set_fontsize('small')
        plot_dict = mpld3.fig_to_dict(fig)
        plt.close()
        return plot_dict
コード例 #11
0
  def processAnswer(self,exp_uid,args_json,db,ell):
    """
    reporting back the reward of pulling the arm suggested by getQuery

    Expected input (in json structure with string keys):
      (index) index_winner : index of the winner in (must be index of left or right target in target_indices)
      (str) query_uid : unique identifier of query

    Expected output (comma separated): 
      if error:
        return (JSON) '{}', (bool) False, (str) error
      else:
        return (JSON) '{}', (bool) True,''
    """

    try:
      app_id = self.app_id

      log_entry = { 'exp_uid':exp_uid,'task':'processAnswer','json':args_json,'timestamp':utils.datetimeNow() } 
      ell.log( app_id+':APP-CALL', log_entry  )

      # convert args_json to args_dict
      try:
        args_dict = json.loads(args_json)
      except:
        error = "%s.processAnswer input args_json is in improper format" % self.app_id
        return '{}',False,error

      # check for the fields that must be contained in args or error occurs
      necessary_fields = ['index_winner','query_uid']
      for field in necessary_fields:
        try:
          args_dict[field]
        except KeyError:
          error = "%s.processAnswer input arguments missing field: %s" % (self.app_id,str(field)) 
          return '{}',False,error

      # get list of algorithms associated with project
      alg_list,didSucceed,message = db.get(app_id+':experiments',exp_uid,'alg_list')

      # get alg_id
      query_uid = args_dict['query_uid']
      alg_uid,didSucceed,message = db.get(app_id+':queries',query_uid,'alg_uid')
      if not didSucceed:
        raise Exception("Failed to retrieve query with query_uid="+query_uid)
      for algorithm in alg_list:
        if alg_uid == algorithm['alg_uid']:
          alg_id = algorithm['alg_id']
          alg_label = algorithm['alg_label']
          test_alg_label = algorithm['test_alg_label']
          num_reported_answers,didSucceed,message = db.increment(app_id+':experiments',exp_uid,'num_reported_answers_for_'+alg_uid)

      # get sandboxed database for the specific app_id,alg_id,exp_uid - closing off the rest of the database to the algorithm
      rc = ResourceClient(app_id,exp_uid,alg_uid,db)

      # get specific algorithm to make calls to 
      alg = utils.get_app_alg(self.app_id,alg_id)

      targets,didSucceed,message = db.get(app_id+':queries',query_uid,'target_indices')
      for target in targets:
        if target['label'] == 'center':
          index_center = target['index']
        elif target['label'] == 'left':
          index_left = target['index']
        elif target['label'] == 'right':
          index_right = target['index']

      index_winner = args_dict['index_winner']

      # update query doc
      timestamp_query_generated,didSucceed,message = db.get(app_id+':queries',query_uid,'timestamp_query_generated')
      datetime_query_generated = utils.str2datetime(timestamp_query_generated)
      timestamp_answer_received = args_dict.get('meta',{}).get('timestamp_answer_received',None)
      if timestamp_answer_received == None:
        datetime_answer_received = datetime_query_generated
      else:
        datetime_answer_received = utils.str2datetime(timestamp_answer_received)
      delta_datetime = datetime_answer_received - datetime_query_generated
      round_trip_time = delta_datetime.seconds + delta_datetime.microseconds/1000000.
      response_time = float(args_dict.get('response_time',0.))
      db.set(app_id+':queries',query_uid,'response_time',response_time)
      db.set(app_id+':queries',query_uid,'network_delay',round_trip_time-response_time)
      db.set(app_id+':queries',query_uid,'index_winner',index_winner)
      q = [index_left,index_right,index_center]
      if index_winner==index_right:
        q = [index_right,index_left,index_center]
      db.set(app_id+':queries',query_uid,'q',q)

      # call processAnswer
      didSucceed,dt = utils.timeit(alg.processAnswer)(resource=rc,index_center=index_center,index_left=index_left,index_right=index_right,index_winner=index_winner)

      log_entry_durations = { 'exp_uid':exp_uid,'alg_uid':alg_uid,'task':'processAnswer','duration':dt } 
      log_entry_durations.update( rc.getDurations() )
      meta = {'log_entry_durations':log_entry_durations}

      
      # check if we're going to evaluate this loss
      n,didSucceed,message = db.get(app_id+':experiments',exp_uid,'n')
      
      if num_reported_answers % ((n+4)/4) == 0:
        predict_id = 'get_embedding'
        params = {'alg_label':alg_label}
        predict_args_dict = {'predict_id':predict_id,'params':params}
        predict_args_json = json.dumps(predict_args_dict)

        db.submit_job(app_id,exp_uid,'predict',predict_args_json,ignore_result=True)
      ###############

      response_args_dict = {}
      args_out = {'args':response_args_dict,'meta':meta}
      response_json = json.dumps(args_out)

      log_entry = { 'exp_uid':exp_uid,'task':'processAnswer','json':response_json,'timestamp':utils.datetimeNow() } 
      ell.log( app_id+':APP-RESPONSE', log_entry  )

      return response_json,True,""
    except Exception, err:
      error = traceback.format_exc()
      log_entry = { 'exp_uid':exp_uid,'task':'processAnswer','error':error,'timestamp':utils.datetimeNow() } 
      ell.log( app_id+':APP-EXCEPTION', log_entry  )
      return '{}',False,error
コード例 #12
0
ファイル: Dashboard.py プロジェクト: mcomsa/NEXT
    def test_error_multiline_plot(self, app, butler):
        """
        Description: Returns multiline plot where there is a one-to-one mapping lines to
        algorithms and each line indicates the error on the validation set with respect to number of reported answers

        Expected input:
          None

        Expected output (in dict):
          (dict) MPLD3 plot dictionary
        """
        print('\n in multi line plot functino')
        args = butler.experiment.get(key='args')
        alg_list = args['alg_list']
        test_alg_label = alg_list[0]['test_alg_label']

        test_queries, didSucceed, message = butler.db.get_docs_with_filter(
            app.app_id + ':queries', {
                'exp_uid': app.exp_uid,
                'alg_label': test_alg_label
            })

        x_min = numpy.float('inf')
        x_max = -numpy.float('inf')
        y_min = numpy.float('inf')
        y_max = -numpy.float('inf')
        list_of_alg_dicts = []

        for algorithm in alg_list:
            print('\n doing something for :', algorithm)
            alg_label = algorithm['alg_label']
            list_of_log_dict, didSucceed, message = self.ell.get_logs_with_filter(
                app.app_id + ':ALG-EVALUATION', {
                    'exp_uid': app.exp_uid,
                    'alg_label': alg_label
                })
            list_of_log_dict = sorted(
                list_of_log_dict,
                key=lambda item: utils.str2datetime(item['timestamp']))
            x = []
            y = []

            for item in list_of_log_dict:
                print('\n calculating ... :', algorithm)
                num_reported_answers = item['num_reported_answers']
                precision = item['precision']

                err = int(precision * 100)
                x.append(num_reported_answers)
                y.append(err)

            # this would be taken from a call to get_responses on
            x = numpy.argsort(x)
            x = [x[i] for i in x]
            y = [y[i] for i in x]

            alg_dict = {}
            alg_dict['legend_label'] = alg_label
            alg_dict['x'] = x
            alg_dict['y'] = y
            try:
                x_min = min(x_min, min(x))
                x_max = max(x_max, max(x))
                y_min = min(y_min, min(y))
                y_max = max(y_max, max(y))
            except:
                pass

            list_of_alg_dicts.append(alg_dict)

        import matplotlib.pyplot as plt
        import mpld3

        width = 0.8
        fig, ax = plt.subplots(subplot_kw=dict(axisbg='#EEEEEE'))
        for alg_dict in list_of_alg_dicts:
            ax.plot(alg_dict['x'],
                    alg_dict['y'],
                    label=alg_dict['legend_label'])
        ax.set_xlabel('Number of held out examples (#)')
        ax.set_ylabel('Accuracy (%)')
        ax.set_xlim([x_min, x_max])
        ax.set_ylim([y_min - width, y_max + width])
        ax.grid(color='white', linestyle='solid')
        ax.set_title(
            'Product Classification Accuracy on Held Out Examples (higher is better)',
            size=14)
        legend = ax.legend(loc=2, ncol=3, mode="expand")
        for label in legend.get_texts():
            label.set_fontsize('small')
        plot_dict = mpld3.fig_to_dict(fig)
        plt.close()

        return plot_dict
コード例 #13
0
    def processAnswer(self, exp_uid, args_json, db, ell):
        """
    reporting back the reward of pulling the arm suggested by getQuery

    Expected input (in json structure with string keys):
      (str) query_uid : unique identifier of query
      (int) index_winner : index of arm must be {index_left,index_right}

    Expected output (comma separated): 
      if error:
        return (JSON) '{}', (bool) False, (str) error
      else:
        return (JSON) '{}', (bool) True,''

    Usage:
      processAnswer_args_json,didSucceed,message = app.processAnswer(db_API,exp_uid,processAnswer_args_json)

    Example input:
      processAnswer_args_json = {"query_uid": "4d02a9924f92138287edd17ca5feb6e1", "index_winner": 8}

    Example output:
      processAnswer_response_json = {}
    """

        try:
            app_id = self.app_id

            log_entry = {
                'exp_uid': exp_uid,
                'task': 'processAnswer',
                'json': args_json,
                'timestamp': utils.datetimeNow()
            }
            ell.log(app_id + ':APP-CALL', log_entry)

            # convert args_json to args_dict
            try:
                args_dict = json.loads(args_json)
            except:
                error = "%s.processAnswer input args_json is in improper format" % self.app_id
                return '{}', False, error

            # check for the fields that must be contained in args or error occurs
            necessary_fields = ['index_winner', 'query_uid']
            for field in necessary_fields:
                try:
                    args_dict[field]
                except KeyError:
                    error = "%s.processAnswer input arguments missing field: %s" % (
                        self.app_id, str(field))
                    return '{}', False, error

            # get list of algorithms associated with project
            alg_list, didSucceed, message = db.get(app_id + ':experiments',
                                                   exp_uid, 'alg_list')

            # get alg_id
            query_uid = args_dict['query_uid']
            alg_uid, didSucceed, message = db.get(app_id + ':queries',
                                                  query_uid, 'alg_uid')
            for algorithm in alg_list:
                if alg_uid == algorithm['alg_uid']:
                    alg_id = algorithm['alg_id']
                    alg_label = algorithm['alg_label']
                    num_reported_answers, didSucceed, message = db.increment(
                        app_id + ':experiments', exp_uid,
                        'num_reported_answers_for_' + alg_uid)

            # get sandboxed database for the specific app_id,alg_id,exp_uid - closing off the rest of the database to the algorithm
            rc = ResourceClient(app_id, exp_uid, alg_uid, db)

            # get specific algorithm to make calls to
            alg = utils.get_app_alg(self.app_id, alg_id)

            targets, didSucceed, message = db.get(app_id + ':queries',
                                                  query_uid, 'target_indices')
            for target in targets:
                if target['label'] == 'left':
                    index_left = target['index']
                if target['label'] == 'right':
                    index_right = target['index']
                if target['flag'] == 1:
                    index_painted = target['index']

            index_winner = args_dict['index_winner']

            # update query doc
            timestamp_query_generated, didSucceed, message = db.get(
                app_id + ':queries', query_uid, 'timestamp_query_generated')
            datetime_query_generated = utils.str2datetime(
                timestamp_query_generated)
            timestamp_answer_received = args_dict.get('meta', {}).get(
                'timestamp_answer_received', None)
            if timestamp_answer_received == None:
                datetime_answer_received = datetime_query_generated
            else:
                datetime_answer_received = utils.str2datetime(
                    timestamp_answer_received)
            delta_datetime = datetime_answer_received - datetime_query_generated
            round_trip_time = delta_datetime.seconds + delta_datetime.microseconds / 1000000.
            response_time = float(args_dict.get('response_time', 0.))
            db.set(app_id + ':queries', query_uid, 'response_time',
                   response_time)
            db.set(app_id + ':queries', query_uid, 'network_delay',
                   round_trip_time - response_time)
            db.set(app_id + ':queries', query_uid, 'index_winner',
                   index_winner)

            # call processAnswer
            didSucceed, dt = utils.timeit(alg.processAnswer)(
                resource=rc,
                index_left=index_left,
                index_right=index_right,
                index_painted=index_painted,
                index_winner=index_winner)

            log_entry_durations = {
                'exp_uid': exp_uid,
                'alg_uid': alg_uid,
                'task': 'processAnswer',
                'duration': dt
            }
            log_entry_durations.update(rc.getDurations())
            meta = {'log_entry_durations': log_entry_durations}

            ###############
            predict_id = 'arm_ranking'
            params = {'alg_label': alg_label}
            predict_args_dict = {'predict_id': predict_id, 'params': params}
            predict_args_json = json.dumps(predict_args_dict)

            db.submit_job(app_id,
                          exp_uid,
                          'predict',
                          predict_args_json,
                          ignore_result=True)
            ###############

            response_args_dict = {}
            args_out = {'args': response_args_dict, 'meta': meta}
            response_json = json.dumps(args_out)

            log_entry = {
                'exp_uid': exp_uid,
                'task': 'processAnswer',
                'json': response_json,
                'timestamp': utils.datetimeNow()
            }
            ell.log(app_id + ':APP-RESPONSE', log_entry)

            return response_json, True, ""
        except Exception, err:
            error = traceback.format_exc()
            log_entry = {
                'exp_uid': exp_uid,
                'task': 'processAnswer',
                'error': error,
                'timestamp': utils.datetimeNow(),
                'args_json': args_json
            }
            ell.log(app_id + ':APP-EXCEPTION', log_entry)
            return '{}', False, error
コード例 #14
0
ファイル: Dashboard.py プロジェクト: ayonsn017/NEXT_Chem
    def test_error_multiline_plot(self, app, butler):
        """
        Description: Returns multiline plot where there is a one-to-one mapping lines to
        algorithms and each line indicates the error on the validation set with respect to number of reported answers

        Expected input:
          None

        Expected output (in dict):
          (dict) MPLD3 plot dictionary
        """
        args = butler.experiment.get(key='args')
        alg_list = args['alg_list']
        test_alg_label = alg_list[0]['test_alg_label']

        test_queries = butler.db.get_docs_with_filter(
            app.app_id + ':queries', {
                'exp_uid': app.exp_uid,
                'alg_label': test_alg_label
            })

        test_S = [(query['target_index'], query['target_label'])
                  for query in test_queries if 'target_index' in query.keys()]

        targets = butler.targets.get_targetset(app.exp_uid)
        targets = sorted(targets, key=lambda x: x['target_id'])
        target_features = []

        for target_index in range(len(targets)):
            target_vec = targets[target_index]['meta']['features']
            target_vec.append(1.)
            target_features.append(target_vec)

        x_min = numpy.float('inf')
        x_max = -numpy.float('inf')
        y_min = numpy.float('inf')
        y_max = -numpy.float('inf')
        list_of_alg_dicts = []

        for algorithm in alg_list:
            alg_label = algorithm['alg_label']
            list_of_log_dict = self.ell.get_logs_with_filter(
                app.app_id + ':ALG-EVALUATION', {
                    'exp_uid': app.exp_uid,
                    'alg_label': alg_label
                })
            list_of_log_dict = sorted(
                list_of_log_dict,
                key=lambda item: utils.str2datetime(item['timestamp']))
            x = []
            y = []
            for item in list_of_log_dict:
                num_reported_answers = item['num_reported_answers']
                weights = item['weights']

                err = 0.
                for q in test_S:
                    estimated_label = numpy.sign(
                        numpy.dot(numpy.array(target_features[q[0]]),
                                  numpy.array(weights)))
                    err += estimated_label * q[
                        1] < 0.  #do the labels agree or not

                m = float(len(test_S))
                err = err / m
                x.append(num_reported_answers)
                y.append(err)

            x = numpy.argsort(x)
            x = [x[i] for i in x]
            y = [y[i] for i in x]

            alg_dict = {}
            alg_dict['legend_label'] = alg_label
            alg_dict['x'] = x
            alg_dict['y'] = y
            try:
                x_min = min(x_min, min(x))
                x_max = max(x_max, max(x))
                y_min = min(y_min, min(y))
                y_max = max(y_max, max(y))
            except:
                pass

            list_of_alg_dicts.append(alg_dict)

        import matplotlib.pyplot as plt
        import mpld3
        fig, ax = plt.subplots(subplot_kw=dict(axisbg='#EEEEEE'))
        for alg_dict in list_of_alg_dicts:
            ax.plot(alg_dict['x'],
                    alg_dict['y'],
                    label=alg_dict['legend_label'])
        ax.set_xlabel('Number of answered queries')
        ax.set_ylabel('Error on hold-out set')
        ax.set_xlim([x_min, x_max])
        ax.set_ylim([y_min, y_max])
        ax.grid(color='white', linestyle='solid')
        ax.set_title('Test Error', size=14)
        legend = ax.legend(loc=2, ncol=3, mode="expand")
        for label in legend.get_texts():
            label.set_fontsize('small')
        plot_dict = mpld3.fig_to_dict(fig)
        plt.close()

        return plot_dict
コード例 #15
0
    def test_error_multiline_plot(self, app, butler):
        """
        Description: Returns multiline plot where there is a one-to-one mapping lines to
        algorithms and each line indicates the error on the validation set with respect to number of reported answers

        Expected input:
          None

        Expected output (in dict):
          (dict) MPLD3 plot dictionary
        """
        # get list of algorithms associated with project
        args = butler.experiment.get(key='args')
        test_alg_label = args['alg_list'][0]['test_alg_label']

        test_S = butler.queries.get(pattern={
            'exp_uid': app.exp_uid,
            'alg_label': test_alg_label
        })
        x_min = numpy.float('inf')
        x_max = -numpy.float('inf')
        y_min = numpy.float('inf')
        y_max = -numpy.float('inf')
        list_of_alg_dicts = []

        for algorithm in args['alg_list']:
            alg_label = algorithm['alg_label']
            list_of_log_dict, didSucceed, message = butler.ell.get_logs_with_filter(
                app.app_id + ':ALG-EVALUATION', {
                    'exp_uid': app.exp_uid,
                    'alg_label': alg_label
                })
            list_of_log_dict = sorted(
                list_of_log_dict,
                key=lambda item: utils.str2datetime(item['timestamp']))
            x = []
            y = []
            for item in list_of_log_dict:
                num_reported_answers = item['num_reported_answers']
                Xd = item['X']
                err = 0.5
                if len(test_S) > 0:
                    # compute error rate
                    number_correct = 0.
                    for query in test_S:
                        if 'q' in query:
                            i, j, k = query['q']
                            score = numpy.dot(Xd[j], Xd[j]) - 2 * numpy.dot(
                                Xd[j], Xd[k]) + 2 * numpy.dot(
                                    Xd[i], Xd[k]) - numpy.dot(Xd[i], Xd[i])
                            if score > 0:
                                number_correct += 1.0

                    accuracy = number_correct / len(test_S)
                    err = 1.0 - accuracy
                x.append(num_reported_answers)
                y.append(err)
            alg_dict = {'legend_label': alg_label, 'x': x, 'y': y}
            try:
                x_min = min(x_min, min(x))
                x_max = max(x_max, max(x))
                y_min = min(y_min, min(y))
                y_max = max(y_max, max(y))
            except:
                pass
            list_of_alg_dicts.append(alg_dict)

        import matplotlib.pyplot as plt
        import mpld3
        fig, ax = plt.subplots(subplot_kw=dict(axisbg='#EEEEEE'))
        for alg_dict in list_of_alg_dicts:
            ax.plot(alg_dict['x'],
                    alg_dict['y'],
                    label=alg_dict['legend_label'])
        ax.set_xlabel('Number of answered triplets')
        ax.set_ylabel('Error on hold-out set')
        ax.set_xlim([x_min, x_max])
        ax.set_ylim([y_min, y_max])
        ax.grid(color='white', linestyle='solid')
        ax.set_title('Triplet Test Error', size=14)
        legend = ax.legend(loc=2, ncol=3, mode="expand")
        for label in legend.get_texts():
            label.set_fontsize('small')
        plot_dict = mpld3.fig_to_dict(fig)
        plt.close()
        return plot_dict
コード例 #16
0
ファイル: AppDashboard.py プロジェクト: dconathan/NEXT
  def compute_duration_detailed_stacked_area_plot(self,app,butler,task,alg_label,detailedDB=False):
    """
    Description: Returns stacked area plot for a particular algorithm and task where the durations
    are broken down into compute,db_set,db_get (for cpu, database_set, database_get)

    Expected input:
      (string) task :  must be in {'getQuery','processAnswer','predict'}
      (string) alg_label : must be a valid alg_label contained in alg_list list of dicts

    Expected output (in dict):
      (dict) MPLD3 plot dictionary
    """
    list_of_log_dict,didSucceed,message = butler.ell.get_logs_with_filter(app.app_id+':ALG-DURATION',
                                                                          {'exp_uid':app.exp_uid,'alg_label':alg_label,'task':task})
    list_of_log_dict = sorted(list_of_log_dict, key=lambda item: utils.str2datetime(item['timestamp']) )

    y = []
    for item in list_of_log_dict:
      y.append( item.get('app_duration',0.) + item.get('duration_enqueued',0.) )
    y = numpy.array(y)
    num_items = len(list_of_log_dict)
    multiplier = min(num_items,MAX_SAMPLES_PER_PLOT)
    incr_inds = [ k*num_items/multiplier for k in range(multiplier)]
    max_inds = list(numpy.argsort(-y)[0:multiplier])
    final_inds = sorted(set(incr_inds + max_inds))

    x = []
    t = []
    enqueued = []
    admin = []
    dbGet = []
    dbSet = []
    compute = []

    max_y_value = 0.
    min_y_value = float('inf')
    for idx in final_inds:
      item = list_of_log_dict[idx]
      x.append(idx+1)
      t.append(str(item.get('timestamp','')))

      _alg_duration = item.get('duration',0.)
      _alg_duration_dbGet = item.get('duration_dbGet',0.)
      _alg_duration_dbSet = item.get('duration_dbSet',0.)
      _duration_enqueued = item.get('duration_enqueued',0.)
      _app_duration = item.get('app_duration',0.)

      if (_app_duration+_duration_enqueued) > max_y_value:
        max_y_value = _app_duration + _duration_enqueued
      if (_app_duration+_duration_enqueued) < min_y_value:
        min_y_value = _app_duration + _duration_enqueued

      enqueued.append(_duration_enqueued)
      admin.append(_app_duration-_alg_duration)
      dbSet.append(_alg_duration_dbSet)
      dbGet.append(_alg_duration_dbGet)
      compute.append( _alg_duration - _alg_duration_dbSet - _alg_duration_dbGet )

    try:
      min_x = min(x)
      max_x = max(x)
    except:
      min_x = 0.
      max_x = 0.

    fig, ax = plt.subplots(subplot_kw=dict(axisbg='#EEEEEE'))
    stack_coll = ax.stackplot(x,compute,dbGet,dbSet,admin,enqueued, alpha=.5)
    ax.set_xlabel('API Call')
    ax.set_ylabel('Duration (s)')
    ax.set_xlim([min_x,max_x])
    ax.set_ylim([0.,max_y_value])
    ax.grid(color='white', linestyle='solid')
    ax.set_title(alg_label+' - '+task, size=14)
    proxy_rects = [plt.Rectangle((0, 0), 1, 1, alpha=.5,fc=pc.get_facecolor()[0]) for pc in stack_coll]
    legend = ax.legend(proxy_rects, ['compute','dbGet','dbSet','admin','enqueued'],loc=2,ncol=3,mode="expand")
    for label in legend.get_texts():
      label.set_fontsize('small')
    plot_dict = mpld3.fig_to_dict(fig)
    plt.close()
    return plot_dict
コード例 #17
0
ファイル: Dashboard.py プロジェクト: nextml/NEXT
    def test_error_multiline_plot(self,app, butler):
        """
        Description: Returns multiline plot where there is a one-to-one mapping lines to
        algorithms and each line indicates the error on the validation set with respect to number of reported answers

        Expected input:
          None

        Expected output (in dict):
          (dict) MPLD3 plot dictionary
        """
        args = butler.experiment.get(key='args')
        alg_list = args['alg_list']
        test_alg_label = alg_list[0]['test_alg_label']

        test_queries = butler.db.get_docs_with_filter(app.app_id+':queries',{'exp_uid':app.exp_uid, 'alg_label':test_alg_label})

        test_S = [(query['target_index'], query['target_label']) 
                            for query in test_queries
                            if 'target_index' in query.keys()]

        targets = butler.targets.get_targetset(app.exp_uid)
        targets = sorted(targets,key=lambda x: x['target_id'])
        target_features = []

        for target_index in range(len(targets)):
            target_vec = targets[target_index]['meta']['features']
            target_vec.append(1.)
            target_features.append(target_vec)

        x_min = numpy.float('inf')
        x_max = -numpy.float('inf')
        y_min = numpy.float('inf')
        y_max = -numpy.float('inf')
        list_of_alg_dicts = []

        for algorithm in alg_list:
            alg_label = algorithm['alg_label']
            list_of_log_dict = self.ell.get_logs_with_filter(app.app_id+':ALG-EVALUATION',{'exp_uid':app.exp_uid, 'alg_label':alg_label})
            list_of_log_dict = sorted(list_of_log_dict, key=lambda item: utils.str2datetime(item['timestamp']) )
            x = []
            y = []
            for item in list_of_log_dict:
                num_reported_answers = item['num_reported_answers']
                weights = item['weights']

                err = 0.
                for q in test_S:
                    estimated_label = numpy.sign(numpy.dot( numpy.array(target_features[q[0]]), numpy.array(weights) ))
                    err += estimated_label*q[1]<0. #do the labels agree or not

                m = float(len(test_S))
                err = err/m
                x.append(num_reported_answers)
                y.append(err)

            x = numpy.argsort(x)
            x = [x[i] for i in x]
            y = [y[i] for i in x]
        
            alg_dict = {}
            alg_dict['legend_label'] = alg_label
            alg_dict['x'] = x
            alg_dict['y'] = y
            try:
                x_min = min(x_min,min(x))
                x_max = max(x_max,max(x))
                y_min = min(y_min,min(y))
                y_max = max(y_max,max(y))
            except:
                pass

            list_of_alg_dicts.append(alg_dict)

        import matplotlib.pyplot as plt
        import mpld3
        fig, ax = plt.subplots(subplot_kw=dict(axisbg='#EEEEEE'))
        for alg_dict in list_of_alg_dicts:
            ax.plot(alg_dict['x'],alg_dict['y'],label=alg_dict['legend_label'])
        ax.set_xlabel('Number of answered queries')
        ax.set_ylabel('Error on hold-out set')
        ax.set_xlim([x_min,x_max])
        ax.set_ylim([y_min,y_max])
        ax.grid(color='white', linestyle='solid')
        ax.set_title('Test Error', size=14)
        legend = ax.legend(loc=2,ncol=3,mode="expand")
        for label in legend.get_texts():
            label.set_fontsize('small')
        plot_dict = mpld3.fig_to_dict(fig)
        plt.close()

        return plot_dict
コード例 #18
0
ファイル: AppDashboard.py プロジェクト: Ahtidevin/NEXT-1
    def compute_duration_multiline_plot(self, app, butler, task):
        """
    Description: Returns multiline plot where there is a one-to-one mapping lines to
    algorithms and each line indicates the durations to complete the task (wrt to the api call)

    Expected input:
      (string) task :  must be in {'getQuery','processAnswer','predict'}

    Expected output (in dict):
      (dict) MPLD3 plot dictionary
    """

        alg_list = butler.experiment.get(key='args')['alg_list']
        x_min = numpy.float('inf')
        x_max = -numpy.float('inf')
        y_min = numpy.float('inf')
        y_max = -numpy.float('inf')
        list_of_alg_dicts = []

        for algorithm in alg_list:
            alg_label = algorithm['alg_label']
            list_of_log_dict = butler.ell.get_logs_with_filter(
                app.app_id + ':ALG-DURATION', {
                    'exp_uid': app.exp_uid,
                    'alg_label': alg_label,
                    'task': task
                })
            list_of_log_dict = sorted(
                list_of_log_dict,
                key=lambda item: utils.str2datetime(item['timestamp']))

            x = []
            y = []
            t = []
            k = 0
            for item in list_of_log_dict:
                k += 1
                x.append(k)
                y.append(
                    item.get('app_duration', 0.) +
                    item.get('duration_enqueued', 0.))
                t.append(str(item['timestamp'])[:-3])

            x = numpy.array(x)
            y = numpy.array(y)
            t = numpy.array(t)
            num_items = len(list_of_log_dict)
            multiplier = min(num_items, MAX_SAMPLES_PER_PLOT)
            incr_inds = [r * num_items / multiplier for r in range(multiplier)]
            max_inds = list(numpy.argsort(-y)[0:multiplier])
            final_inds = sorted(set(incr_inds + max_inds))
            x = list(x[final_inds])
            y = list(y[final_inds])
            t = list(t[final_inds])

            alg_dict = {}
            alg_dict['legend_label'] = alg_label
            alg_dict['x'] = x
            alg_dict['y'] = y
            alg_dict['t'] = t
            try:
                x_min = min(x_min, min(x))
                x_max = max(x_max, max(x))
                y_min = min(y_min, min(y))
                y_max = max(y_max, max(y))
            except:
                pass

            list_of_alg_dicts.append(alg_dict)

        return_dict = {}
        return_dict['data'] = list_of_alg_dicts
        return_dict['plot_type'] = 'multi_line_plot'
        return_dict['x_label'] = 'API Call'
        return_dict['x_min'] = x_min
        return_dict['x_max'] = x_max
        return_dict['y_label'] = 'Duration (s)'
        return_dict['y_min'] = y_min
        return_dict['y_max'] = y_max

        fig, ax = plt.subplots(subplot_kw=dict(axisbg='#EEEEEE'))
        for alg_dict in list_of_alg_dicts:
            ax.plot(alg_dict['x'],
                    alg_dict['y'],
                    label=alg_dict['legend_label'])
        ax.set_xlabel('API Call')
        ax.set_ylabel('Duration (s)')
        ax.set_xlim([x_min, x_max])
        ax.set_ylim([y_min, y_max])
        ax.grid(color='white', linestyle='solid')
        ax.set_title(task, size=14)
        legend = ax.legend(loc=2, ncol=3, mode="expand")
        for label in legend.get_texts():
            label.set_fontsize('small')
        plot_dict = mpld3.fig_to_dict(fig)
        plt.close()
        return plot_dict