Ejemplo n.º 1
0
def plot_selected_hp(trace, hp_name_x, hp_name_y):
    
    
    prob_dict, = get_column_list(trace.db.salad, 'prob' )
    
    hp_id_list, hp_dict_list = get_column_list(trace.db.eval_info, 'hp_id', 'hp_' )
    hp_map = { hp_id : hp_dict for hp_id, hp_dict in  zip( hp_id_list, hp_dict_list ) }
    
    point_list= []
    for hp_id, prob in prob_dict[-1]:
        hp_dict = hp_map[hp_id]
        x = hp_dict[hp_name_x ]
        y = hp_dict[hp_name_y ]
        point_list.append((x,y,prob))
    
    x,y,prob = np.array(point_list).T


    pp.scatter(x,y, s=prob/min(prob)*20,c='b', marker='+',linewidths=0.5)
    
    i, amin_list, chosen = get_column_list(trace.db.argmin, 'i', 'argmin_list', 'chosen_hp_id' )
    
    idx = max(i)
    amin_list = amin_list[idx]
    hp = hp_map[chosen[idx]]
    
    pp.scatter(hp[hp_name_x],hp[hp_name_y],s=200,  marker='+',c='g',linewidths=1)
Ejemplo n.º 2
0
def plot_time(trace, axes=None):
    if axes is None:
        axes = pp.gca()
        
    hp_id, choose_time = get_column_list(trace.db.candidates,  'hp_id', 'choose.time' )
    hp_id_analyse, analyse_time = get_column_list(trace.db.analyze,  'hp_id', 'analyse_time' )
    hp_id_duration, duration = get_column_list(trace.db.eval_info, 'hp_id', 'time' )
    
    max_len = min( (len(hp_id), len(hp_id_analyse), len(hp_id_duration) ) )
    choose_time = choose_time[:max_len]
    analyse_time = analyse_time[:max_len]
    duration = duration[:max_len]

    hp_id_map = dict( zip( hp_id, range( len(hp_id)) ) )
    
    analyse_time = remap( hp_id_map, hp_id_analyse, analyse_time )
    duration = remap( hp_id_map, hp_id_duration, duration )

    

    i = np.arange( max_len )
    
    axes.bar(i,analyse_time, color='r', label = 'analyse time' )
    axes.bar(i,choose_time, bottom = analyse_time, color='b', label='choose time' )
    axes.bar(i,duration, bottom = analyse_time+choose_time, color='g', label='learn time' )
    axes.set_xlabel('iteration')
    axes.set_ylabel('time (s)' )
    axes.legend(loc='best')
    axes.set_title('time per iteration for different components')
Ejemplo n.º 3
0
def plot_selected_hp_trace(trace):
    prob_dict, = get_column_list(trace.db.salad,  'prob' )
    hp_id_list,  = get_column_list(trace.db.eval_info, 'hp_id' )
    
    
    hp_id_map = dict( zip( hp_id_list, range(len(hp_id_list))))
    
    prob_mat = np.zeros( (len(hp_id_list),len(prob_dict)) )
    
    for j, prob_list in enumerate( prob_dict):
        for hp_id, p in prob_list:
            i = hp_id_map[hp_id]
            prob_mat[ i, j ] = p
    
    pp.imshow(prob_mat, origin='lower', aspect='auto', cmap='binary', interpolation='nearest' )
    
    
    col_list = get_column_list(trace.db.argmin, 'i', 'argmin_list', 'chosen_hp_id' )
    
    for i, argmin_list, chosen_hp_id in zip(*col_list):
        idxL = np.array([ hp_id_map[hp_id] for hp_id in argmin_list ])
        
        pp.scatter( [i]*len(idxL), idxL, 2 , color='blue',alpha=0.2  ) # plot all candidates
        pp.scatter( i, hp_id_map[chosen_hp_id], 10,  facecolors='none', edgecolors='r' ) # plot the chosen one
    
    pp.xlabel('iteration')
    pp.ylabel('candidate')
    pp.title('The chosen candidate for each iteration' )
Ejemplo n.º 4
0
def load_eval_info(trace_path):
    """
    Extract from the trace the predictions on the test and the validation sets for
    every models. 
    """
    trace = pkl_trace.TraceDBFile(trace_path)
    y_tst_N, y_val_N = pkl_trace.get_column_list(trace.db.eval_info, "tst.y", "val.y")
    y_tst_m, y_val_m = pkl_trace.get_column_list(trace.db.y, "tst", "val")
    metric, ds_name, hp_space = pkl_trace.get_column_list(trace.db.main, "__dict__.metric", "ds_name", "hp_space")

    tst_eval_info = EvalInfo(y_tst_m[0], y_tst_N, metric[0], ds_name[0], hp_space[0].name)
    val_eval_info = EvalInfo(y_val_m[0], y_val_N, metric[0], ds_name[0], hp_space[0].name)
    return tst_eval_info, val_eval_info
Ejemplo n.º 5
0
def sign_test_over_time(trace_list,
                        key_A='salad_risk.tst',
                        key_B='argmin_risk.tst'):

    wins = np.zeros(1)
    lose = np.zeros(1)

    for trace in trace_list:
        a, b = get_column_list(trace.db.analyze, key_A, key_B)
        a = np.array(a)
        b = np.array(b)

        n = max(len(a), len(wins))
        if n > len(wins):
            print 'resizing', wins.shape
            wins.resize(n)
            lose.resize(n)

        elif n > len(a):
            a.resize(n)
            b.resize(n)

        print wins.shape, lose.shape, a.shape, b.shape

        #         mask = a!=b
        wins[a < b] += 1.
        lose[a > b] += 1


#         wins[a==b] += 0.5

#         count[:n] += 1

    return wins, wins + lose
def sign_test_over_time(trace_list, key_A='salad_risk.tst', key_B='argmin_risk.tst' ):
    
    wins  = np.zeros(1)
    lose = np.zeros(1)
    
    for trace in trace_list:
        a, b = get_column_list( trace.db.analyze, key_A, key_B )
        a = np.array(a)
        b = np.array(b)

        n = max( len(a), len(wins) )
        if n > len(wins):
            print 'resizing', wins.shape
            wins.resize(n)
            lose.resize(n)
        
        elif n > len(a):
            a.resize(n)
            b.resize(n)
        
        print wins.shape, lose.shape, a.shape, b.shape
        
#         mask = a!=b
        wins[a<b] += 1.
        lose[a>b] += 1
#         wins[a==b] += 0.5

#         count[:n] += 1

    return wins, wins+lose
Ejemplo n.º 7
0
def load_eval_info(trace_path):
    """
    Extract from the trace the predictions on the test and the validation sets for
    every models. 
    """
    trace = pkl_trace.TraceDBFile(trace_path)
    y_tst_N, y_val_N = pkl_trace.get_column_list(trace.db.eval_info, 'tst.y',
                                                 'val.y')
    y_tst_m, y_val_m = pkl_trace.get_column_list(trace.db.y, 'tst', 'val')
    metric, ds_name, hp_space = pkl_trace.get_column_list(
        trace.db.main, '__dict__.metric', 'ds_name', 'hp_space')

    tst_eval_info = EvalInfo(y_tst_m[0], y_tst_N, metric[0], ds_name[0],
                             hp_space[0].name)
    val_eval_info = EvalInfo(y_val_m[0], y_val_N, metric[0], ds_name[0],
                             hp_space[0].name)
    return tst_eval_info, val_eval_info
Ejemplo n.º 8
0
    def __init__(self, trace):
        self.hp_space = get_column_list(trace.db.main, 'hp_space' )[0][0]
        self.trace = trace
        self.hp_id_list, unit_list, chooser_state_list, i = get_column_list( trace.db.candidates, 'hp_id', 'unit_value', 'chooser_state', 'i')


        self.chooser_state = chooser_state_list[np.argmax(i)]
        self.hp_id_map = make_map(self.hp_id_list)
    
        self.unit_grid = np.array(unit_list)
        
        row_list = []
        for unit_value in self.unit_grid:
            hp_conf = HpConfiguration(self.hp_space, unit_value)
            hp_keys, val = zip( *hp_conf.var_list() )
            row_list.append(val)
        self.col_list = [ np.array(col) for col in zip(*row_list)]
        
        self.hp_keys = hp_keys
        self.hp_key_map = make_map(hp_keys)
Ejemplo n.º 9
0
    def __init__(self, trace):
        self.hp_space = get_column_list(trace.db.main, 'hp_space')[0][0]
        self.trace = trace
        self.hp_id_list, unit_list, chooser_state_list, i = get_column_list(
            trace.db.candidates, 'hp_id', 'unit_value', 'chooser_state', 'i')

        self.chooser_state = chooser_state_list[np.argmax(i)]
        self.hp_id_map = make_map(self.hp_id_list)

        self.unit_grid = np.array(unit_list)

        row_list = []
        for unit_value in self.unit_grid:
            hp_conf = HpConfiguration(self.hp_space, unit_value)
            hp_keys, val = zip(*hp_conf.var_list())
            row_list.append(val)
        self.col_list = [np.array(col) for col in zip(*row_list)]

        self.hp_keys = hp_keys
        self.hp_key_map = make_map(hp_keys)
Ejemplo n.º 10
0
def plot_selected_hp(trace, hp_name_x, hp_name_y):

    prob_dict, = get_column_list(trace.db.salad, 'prob')

    hp_id_list, hp_dict_list = get_column_list(trace.db.eval_info, 'hp_id',
                                               'hp_')
    hp_map = {
        hp_id: hp_dict
        for hp_id, hp_dict in zip(hp_id_list, hp_dict_list)
    }

    point_list = []
    for hp_id, prob in prob_dict[-1]:
        hp_dict = hp_map[hp_id]
        x = hp_dict[hp_name_x]
        y = hp_dict[hp_name_y]
        point_list.append((x, y, prob))

    x, y, prob = np.array(point_list).T

    pp.scatter(x,
               y,
               s=prob / min(prob) * 20,
               c='b',
               marker='+',
               linewidths=0.5)

    i, amin_list, chosen = get_column_list(trace.db.argmin, 'i', 'argmin_list',
                                           'chosen_hp_id')

    idx = max(i)
    amin_list = amin_list[idx]
    hp = hp_map[chosen[idx]]

    pp.scatter(hp[hp_name_x],
               hp[hp_name_y],
               s=200,
               marker='+',
               c='g',
               linewidths=1)
Ejemplo n.º 11
0
def plot_time(trace, axes=None):
    if axes is None:
        axes = pp.gca()

    hp_id, choose_time = get_column_list(trace.db.candidates, 'hp_id',
                                         'choose.time')
    hp_id_analyse, analyse_time = get_column_list(trace.db.analyze, 'hp_id',
                                                  'analyse_time')
    hp_id_duration, duration = get_column_list(trace.db.eval_info, 'hp_id',
                                               'time')

    max_len = min((len(hp_id), len(hp_id_analyse), len(hp_id_duration)))
    choose_time = choose_time[:max_len]
    analyse_time = analyse_time[:max_len]
    duration = duration[:max_len]

    hp_id_map = dict(zip(hp_id, range(len(hp_id))))

    analyse_time = remap(hp_id_map, hp_id_analyse, analyse_time)
    duration = remap(hp_id_map, hp_id_duration, duration)

    i = np.arange(max_len)

    axes.bar(i, analyse_time, color='r', label='analyse time')
    axes.bar(i,
             choose_time,
             bottom=analyse_time,
             color='b',
             label='choose time')
    axes.bar(i,
             duration,
             bottom=analyse_time + choose_time,
             color='g',
             label='learn time')
    axes.set_xlabel('iteration')
    axes.set_ylabel('time (s)')
    axes.legend(loc='best')
    axes.set_title('time per iteration for different components')
Ejemplo n.º 12
0
def plot_selected_hp_trace(trace):
    prob_dict, = get_column_list(trace.db.salad, 'prob')
    hp_id_list, = get_column_list(trace.db.eval_info, 'hp_id')

    hp_id_map = dict(zip(hp_id_list, range(len(hp_id_list))))

    prob_mat = np.zeros((len(hp_id_list), len(prob_dict)))

    for j, prob_list in enumerate(prob_dict):
        for hp_id, p in prob_list:
            i = hp_id_map[hp_id]
            prob_mat[i, j] = p

    pp.imshow(prob_mat,
              origin='lower',
              aspect='auto',
              cmap='binary',
              interpolation='nearest')

    col_list = get_column_list(trace.db.argmin, 'i', 'argmin_list',
                               'chosen_hp_id')

    for i, argmin_list, chosen_hp_id in zip(*col_list):
        idxL = np.array([hp_id_map[hp_id] for hp_id in argmin_list])

        pp.scatter([i] * len(idxL), idxL, 2, color='blue',
                   alpha=0.2)  # plot all candidates
        pp.scatter(i,
                   hp_id_map[chosen_hp_id],
                   10,
                   facecolors='none',
                   edgecolors='r')  # plot the chosen one

    pp.xlabel('iteration')
    pp.ylabel('candidate')
    pp.title('The chosen candidate for each iteration')
Ejemplo n.º 13
0
def plot_eval_info(plot, hp_info, y_keys, perm=None):

    y_dict = get_column_dict(hp_info.trace.db.eval_info, 'hp_id', *y_keys)

    idx = hp_info.map_hp_id_list(y_dict.pop('hp_id'))

    # add the agnostic bayes distribution the the list of traces
    idx_list, distr_list = get_column_list(hp_info.trace.db.predict, 'i',
                                           'prob')
    distr = distr_list[np.argmax(
        idx_list)]  # extract the last computed distribution
    y_dict['AB probability'] = unpack_prob(distr, hp_info, len(idx))

    if len(idx) == 0:
        print 'no results yet'
        return

    gp = MyGP(mcmc_iters=0, noiseless=False)
    gp.set_hypers(hp_info.chooser_state)

    for key in y_keys:
        y_dict[key] = np.array(y_dict[key])


#     print '%s.shape:'%y_key, y.shape

    X = hp_info.unit_grid[idx, :]

    hp_keys = hp_info.hp_keys
    print hp_keys
    if perm is not None:
        X = X[:, perm]
        hp_keys = [hp_keys[i] for i in perm]

    hp_keys = [clean_hp_name(hp_key) for hp_key in hp_keys]
    print hp_keys
    plot.set_info(X, y_dict, 'val.risk', hp_keys, hp_info.hp_space.var_list,
                  gp)
Ejemplo n.º 14
0
def plot_eval_info( plot, hp_info, y_keys, perm = None ):
    
    
    y_dict = get_column_dict( hp_info.trace.db.eval_info, 'hp_id', *y_keys )
    
    idx = hp_info.map_hp_id_list(y_dict.pop('hp_id'))
    
    # add the agnostic bayes distribution the the list of traces
    idx_list, distr_list = get_column_list( hp_info.trace.db.predict, 'i', 'prob' )
    distr = distr_list[ np.argmax(idx_list) ] # extract the last computed distribution
    y_dict['AB probability'] = unpack_prob( distr, hp_info, len(idx))
    
    
    
    if len(idx) == 0:
        print 'no results yet'
        return
    
    gp = MyGP(mcmc_iters=0, noiseless=False)
    gp.set_hypers(hp_info.chooser_state)
    
    for key in y_keys:
        y_dict[key] = np.array(y_dict[key])
    
#     print '%s.shape:'%y_key, y.shape
    
    X = hp_info.unit_grid[idx,:]
    
    hp_keys = hp_info.hp_keys
    print hp_keys
    if perm is not None:
        X = X[:,perm]
        hp_keys = [hp_keys[i] for i in perm ]
    
    hp_keys = [ clean_hp_name(hp_key) for hp_key in hp_keys ]
    print hp_keys
    plot.set_info(X, y_dict, 'val.risk',hp_keys,  hp_info.hp_space.var_list, gp)