def check_positive(): ms = np.arange(-10, 3, 0.1) fs = np.array([f(m) for m in ms]) fs = (fs > 0).transpose() # im = np.zeros_like( fs ) plt.imshow(fs, cmap='gray') plt_tools.set_postion() plt.show() exit()
def plot(): pa = 0.9 delta = 0.01 f = f_setting(pa, delta) ms = np.arange(-10, 3, 0.1) fs = np.array([f(m) for m in ms]) plt.plot(ms, fs, color='blue') plt.scatter(ms[fs == None], np.zeros_like(ms[fs == None]), color='red') plt.ylim([-0.1, 0.2]) plt_tools.set_postion() plt.show() exit()
def prepare_data(dim, delta, sharelogsigma, clipcontroltype, cliprange, clip_clipratio, search_delta=False): global ress_tf_last path_data = path_root + '/KL2Clip/data/train_lambda' Name = f'dim={dim}, delta={delta}, train' path_data_processed = path_data + f'/{Name}' tools.mkdir(path_data_processed) if dim == 1: logsigma0s = np.array([0]) else: raise NotImplementedError logsigma0s = logsigma0s.reshape((-1, dim)) batch_size = 2048 mu = np.zeros((dim, )) opt = KL2Clip(dim=dim, batch_size=batch_size, sharelogsigma=sharelogsigma, clipcontroltype=clipcontroltype, cliprange=cliprange) def get_fn_sample(): mu0 = tf.placeholder(shape=[dim], dtype=tf.float32) a = tf.placeholder(shape=[batch_size, dim], dtype=tf.float32) logsigma0 = tf.placeholder(shape=[dim], dtype=tf.float32) sample_size = tf.placeholder(shape=(), dtype=tf.int32) dist = DiagGaussianPd(tf.concat((mu0, logsigma0), axis=0)) samples = dist.sample(sample_size) fn_sample = U.function([mu0, logsigma0, sample_size], samples) fn_p = U.function([mu0, logsigma0, a], dist.p(a)) return fn_sample, fn_p sess = U.make_session(make_default=True) results = [] fn_sample, fn_p = get_fn_sample() for logsigma0 in logsigma0s: prefix_save = f'{path_data_processed}/logsigma0={logsigma0}' Name_f = f"{Name},logsigma0={logsigma0}" file_fig = f'{prefix_save}.png' # a_s_batch = fn_sample( mu, logsigma0, batch_size ) a_s_batch = np.linspace(-5, 5, batch_size).reshape((-1, 1)) logsigma0s_batch = np.tile(logsigma0, (batch_size, 1)) print(a_s_batch.max(), a_s_batch.min()) # --- sort the data: have problem in 2-dim # inds = np.argsort(a_s_batch, axis=0) # inds = inds.reshape(-1) # a_s_batch = a_s_batch[inds] # logsigma0s_batch = logsigma0s_batch[inds] # tools.reset_time() # a_s_batch.fill(0) # print(a_s_batch.shape) # a_s_batch[0, :]=0 # if search_delta: # for i in range( batch_size): # a_s_batch[i,:] = 0.001 * (batch_size-i) if not os.path.exists(f'{prefix_save}.pkl'): # ress_tf = opt( mu0_logsigma0_tuple=(a_s_batch, logsigma0s_batch), a=None, delta=delta, clip_clipratio=clip_clipratio) ress_tf = opt(mu0_logsigma0_tuple=(np.zeros_like(logsigma0s_batch), logsigma0s_batch), a=a_s_batch, delta=delta, clip_clipratio=clip_clipratio) print(a_s_batch[0], ress_tf.x.max[0], ress_tf.x.min[0]) save_vars(f'{prefix_save}.pkl', a_s_batch, logsigma0, logsigma0s_batch, ress_tf) print(prefix_save) a_s_batch, logsigma0, logsigma0s_batch, ress_tf = load_vars( f'{prefix_save}.pkl') if search_delta: results.append(ress_tf) break if cliprange == clipranges[0]: # TODO tmp fig = plt.figure(figsize=(20, 10)) markers = ['^', '.'] colors = [['blue', 'red'], ['green', 'hotpink']] # for ind, opt_name in enumerate(['max']): for ind, opt_name in enumerate(['max', 'min']): # if ind == 1: # continue # --- plot tensorflow result ratios, cons = ress_tf.ratio[opt_name], ress_tf.con[opt_name] print( f'clip-{opt_name}_mean:{ratios.mean()}, clip-{opt_name}_min:{ratios.min()}, clip-{opt_name}_max:{ratios.max()}' ) if search_delta: continue if DEBUG: pass inds_good = cons <= get_ConstraintThreshold(ress_tf.delta) inds_bad = np.logical_not(inds_good) if dim == 1: if ind == 0 and 1: ps = fn_p(mu, logsigma0, a_s_batch) # +np.abs(ps.max()) + 1 ratio_new = -np.log(ps) ratio_new = ratio_new - ratio_new.min() + ratios.min() alpha = np.exp(-ps * 2) print(alpha) # plt.scatter(a_s_batch, ratio_new, s=5, label='ratio_new0') ratio_new = ratio_new.min() + alpha * (ratio_new - ratio_new.min()) # plt.scatter( a_s_batch, ratio_new, s=5, label='ratio_new1' ) # ps = -ps # ratios = ps - ps.min() + ratios.min() # print( ps.min() ) # ratios_new =np.square( a_s_batch-mu ) * np.exp( -logsigma0 ) # ratio_min = ps / (ps.max()-ps.min()) * ress_tf.ratio.min.max() # plt.scatter(a_s_batch, ratio_min, s=5, label='square') # plt.scatter(a_s_batch, 1./ratio_min, s=5, label='square') # plt.scatter(a_s_batch, 1./ratios, s=5, label='1/max') def plot_new(alpha): clip_max_new, clip_min_new = get_clip_new( alpha, ress_tf.ratio['max'], ress_tf.ratio['min'], clipcontroltype=clipcontroltype) plt.scatter(a_s_batch, clip_max_new, s=5, label=f'clip_max_{alpha}') plt.scatter(a_s_batch, clip_min_new, s=5, label=f'clip_min_{alpha}') if ind == 0: pass # plot_new(0.5) # plot_new(0.5) # plot_new(-1) plt.scatter(a_s_batch[inds_good], ratios[inds_good], label='ratio_predict-good_' + opt_name, s=5, color=colors[ind][0], marker=markers[ind]) plt.scatter(a_s_batch[inds_bad], ratios[inds_bad], label='ratio_predict-bad_' + opt_name, s=5, color=colors[ind][1], marker=markers[ind]) elif dim == 2: ax = fig.gca(projection='3d') # ax.view_init(30, 30) ax.view_init(90, 90) # ax.plot_trisurf(a_s_batch[:, 0], a_s_batch[:, 1], ratios) ax.scatter(a_s_batch[inds_good, 0], a_s_batch[inds_good, 1], ratios[inds_good], label='ratio_predict-good_' + opt_name, s=5, color=colors[ind][0], marker=markers[ind]) ax.scatter(a_s_batch[inds_bad, 0], a_s_batch[inds_bad, 1], ratios[inds_bad], label='ratio_predict-bad_' + opt_name, s=5, color=colors[ind][1], marker=markers[ind]) if dim <= 2 and not search_delta: plt.title( Name_f + f'\nstep:{ress_tf.step},rate_satisfycon:{ress_tf.rate_satisfycon_}, rate_statisfydifference_:{ress_tf.rate_statisfydifference_}, difference_max_:{ress_tf.difference_max_}' ) plt.legend(loc='best') if not DEBUG: plt.savefig(file_fig) opt.close() if dim <= 2 and not search_delta: if DEBUG: if cliprange == clipranges[-1]: plt_tools.set_postion() plt.show() plt.close()
ratio_mins = [] for pa in pas: ratio_pre = ratio = opt_entity(pa, delta, 'min', ratio_pre) ratio_mins.append(ratio) ratio_maxs = np.array(ratio_maxs) ratio_mins = np.array(ratio_mins) return DotMap(ratio=DotMap(max=ratio_maxs, min=ratio_mins)) if __name__ == '__main__': delta = 0.01 pas = np.arange(0.01, 1., 0.001) result = opt(pas, delta) plt.plot(pas, result.ratio.max, 'blue') plt.plot(pas, result.ratio.min, 'green') plt_tools.set_postion() plt.show() exit() def plot(): pa = 0.9 delta = 0.01 f = f_setting(pa, delta) ms = np.arange(-10, 3, 0.1) fs = np.array([f(m) for m in ms]) plt.plot(ms, fs, color='blue') plt.scatter(ms[fs == None], np.zeros_like(ms[fs == None]), color='red') plt.ylim([-0.1, 0.2]) plt_tools.set_postion() plt.show()
def tes_3d_data(): if tools.ispc('xiaoming'): path_root = '/media/root/新加卷/KL2Clip' else: path_root = '' import plt_tools from baselines.common.tools import load_vars, save_vars import matplotlib.pyplot as plt if 1: dim = 1 # tf.logging.set_verbosity(tf.logging.INFO) files = [] path_data = f'{path_root}/data/train' for dir in sorted(os.listdir(path_data)): dir_pickle = os.path.join(path_data, dir) try: file_path = os.listdir(dir_pickle)[0] if os.listdir(dir_pickle)[0].endswith('pkl') else \ os.listdir(dir_pickle)[1] except: continue files.append(os.path.join(dir_pickle, file_path)) tfoptsssss = [] scipyfsolvesssss = [] a_delta = [] # exit() # files = ['/media/root/新加卷/KL2Clip/data/train/dim=1, delta=0.0902, train/logsigma0=[0].pkl'] for ind, f in enumerate(files): # enumerate(files[1::100]): print(f) actions, _, _, ress_tf = load_vars(f) delta = ress_tf.delta # min_mu_logsigma = ress_tf.x.min # max_mu_logsigma = ress_tf.x.max ratio_min_tfopt, ratio_max_tfopt = ress_tf.ratio.min, ress_tf.ratio.max kl2clip = KL2Clip(dim=dim) x0 = np.zeros(shape=(actions.shape[0], 2), dtype=np.float32) # sort by actions inds = np.argsort(actions, axis=0) inds = inds.reshape(-1) actions = actions[inds] ratio_min_tfopt, ratio_max_tfopt = ratio_min_tfopt[ inds], ratio_max_tfopt[inds] ress = kl2clip(mu0_logsigma0_cat=x0, a=actions, delta=delta) ratio_min_scipyfsolve, ratio_max_scipyfsolve = ress.ratio.min, ress.ratio.max a_delta.append( np.concatenate((actions, delta * np.ones_like(actions)), axis=1)) tfoptsssss.append(ratio_max_tfopt) scipyfsolvesssss.append(ratio_max_scipyfsolve) save_vars('aa.pkl', a_delta, tfoptsssss, scipyfsolvesssss) a_delta, tfoptsssss, scipyfsolvesssss = load_vars('aa.pkl') def filter(arr): for ind in range(len(arr)): arr[ind] = arr[ind][0::30] return arr a_delta, tfoptsssss, scipyfsolvesssss = [ filter(item) for item in (a_delta, tfoptsssss, scipyfsolvesssss) ] a_delta = np.concatenate(a_delta, axis=0) tfoptsssss = np.concatenate(tfoptsssss, axis=0) scipyfsolvesssss = np.concatenate(scipyfsolvesssss, axis=0) fig = plt.figure() ax = fig.gca(projection='3d') ax.view_init(0, 0) ax.scatter(a_delta[:, 0], a_delta[:, 1], tfoptsssss, '_tfopt', s=1, color='black') ax.scatter(a_delta[:, 0], a_delta[:, 1], scipyfsolvesssss, '_scipyfsolve', s=1, color='red') plt_tools.set_postion() plt_tools.set_size() # plt_tools.set_equal() plt.show()