def pca_plot_general(headers, pcs, pcs2=None, index=(0, 1), title='', color=['b', 'r'], print_name=False): pc1 = pcs[index[0]] pc2 = pcs[index[1]] fig, ax = plt.subplots() sct = ax.scatter(pc1, pc2, color=color[0]) for i, name in enumerate(headers[2:]): if not print_name: ax.annotate('{}'.format(i - 2), (pc1[i], pc2[i])) else: ax.annotate('{}-'.format(i - 2) + name, (pc1[i], pc2[i])) if pcs2 is not None: fig.hold() pc1 = pcs2[index[0]] pc2 = pcs2[index[1]] sct2 = ax.scatter(pcs2[index[0]], pcs2[index[1]], color=color[1]) for i, name in enumerate(headers[2:]): if not print_name: ax.annotate('{}'.format(i - 2), (pc1[i], pc2[i])) else: ax.annotate('{}-'.format(i - 2) + name, (pc1[i], pc2[i])) fig.legend((sct, sct2), ('train', 'test')) ax.set_xlabel("PC {}".format(index[0])) ax.set_ylabel("PC {}".format(index[1])) ax.set_title("PCA plot for data") fig.savefig(get_plot_path("pca_plot " + title))
def cross_validation_visualization(params, mse_tr, mse_te, params_name='', title='', error_name=''): """visualization the curves of mse_tr and mse_te.""" plt.semilogx(params, mse_tr, marker=".", color='b', label='train error') plt.semilogx(params, mse_te, marker=".", color='r', label='test error') plt.xlabel("Parameters: " + params_name) plt.ylabel("Error: " + error_name) plt.title("cross validation" + title) plt.legend(loc=2) plt.grid(True) plt.savefig(get_plot_path("cross_validation_" + title))
def pca_plot(headers, pc1, pc2, title=''): """ Plot pca accordingly """ fig, ax = plt.subplots() ax.scatter(pc1, pc2) for i, name in enumerate(headers[2:]): ax.annotate(name, (pc1[i], pc2[i])) ax.set_xlabel("PC 1") ax.set_ylabel("PC 2") ax.set_title("PCA plot for data") fig.savefig(get_plot_path("pca_plot " + title))
def histogram(label, data, headers=None, colors=['b', 'r'], print_name=True, transform=None, filename='Default.plt', outlier=False): """ Build up histogram regarding to labels, via each dimensions. Stored in the path: plots/histogram :param ids: index :param label: y :param data: data matrix :param headers: headers accordingly :param print_name: print name on the histogram :return: """ hist_path = get_plot_path() + '/histogram/' # Generate positive and negative index negative_index = np.where(label < 0)[0] positive_index = np.where(label > 0)[0] nega_data = data[negative_index, :] posi_data = data[positive_index, :] if transform is None: transform = [lambda x: x, lambda x: np.log(x + 0.01 - np.min(x)), lambda x: np.sqrt(np.abs(x)), lambda x: np.power(x, 2)] trans_labels = ['linear', 'log', 'sqrt|abs|', 'power'] # Plot according to each dimensions headers = headers[2:] # Hard coded gs = gridspec.GridSpec(2, 2) assert len(headers) == len(data[0]) for index, header in enumerate(headers): # fig, axs = plt.subplots(1, len(transform)) for f_ind, f_trans in enumerate(transform): ax = plt.subplot(gs[int(f_ind / 2), f_ind % 2]) ax.set_aspect('auto') if outlier: ax.hist(f_trans(nega_data[:, index]).T, bins=100, color=colors[0], alpha=0.5) else: # nega_ind = np.where(nega_data[:, index] != -999.0) ax.hist(f_trans(nega_data[np.where(nega_data[:, index] != -999.0)[0], index]).T, bins=100, color=colors[0], alpha=0.5) ax.hold(True) if outlier: ax.hist(f_trans(posi_data[:, index]).T, bins=100, color=colors[1], alpha=0.8) else: ax.hist(f_trans(posi_data[np.where(posi_data[:, index] != -999.0)[0], index]).T, bins=100, color=colors[1], alpha=0.8) if print_name: ax.set_xlabel("{}({})".format(trans_labels[f_ind], header)) else: ax.set_xlabel(trans_labels[f_ind]) ax.hold(False) plt.savefig(hist_path + "{}-{}_{}.png".format(filename, index, header)) plt.close()
def plot_train_test(train_errors, test_errors, names=['', ''], xlabel='', ylabel='', lambdas=None, filename=''): """ train_errors, test_errors and lambas should be list (of the same size) the respective train error and test error for a given lambda, * lambda[0] = 1 * train_errors[0] = RMSE of a ridge regression on the train set * test_errors[0] = RMSE of the parameter found by ridge regression applied on the test set degree is just used for the title of the plot. """ plt.semilogx(lambdas, train_errors, color='b', marker='*', label=names[0]) plt.semilogx(lambdas, test_errors, color='r', marker='*', label=names[1]) plt.xlabel(xlabel) plt.ylabel(ylabel) plt.title(filename) leg = plt.legend(loc=1, shadow=True) leg.draw_frame(False) plt.savefig(get_plot_path("train_test " + filename))
def bias_variance_decomposition_visualization(models, rmse_tr, rmse_te, model_names=[]): """visualize the bias variance decomposition.""" rmse_tr_mean = np.expand_dims(np.mean(rmse_tr, axis=0), axis=0) rmse_te_mean = np.expand_dims(np.mean(rmse_te, axis=0), axis=0) degrees = np.array(range(len(models))) plt.plot( degrees, rmse_tr.T, 'b', linestyle="-", color=([0.7, 0.7, 1]), label='train', linewidth=0.3) plt.plot( degrees, rmse_te.T, 'r', linestyle="-", color=[1, 0.7, 0.7], label='test', linewidth=0.3) plt.plot( degrees, rmse_tr_mean.T, 'b', linestyle="-", label='train', linewidth=3) plt.plot( degrees, rmse_te_mean.T, 'r', linestyle="-", label='test', linewidth=3) # plt.ylim(0.2, 0.7) plt.xlabel("degree") plt.ylabel("error") plt.title("Bias-Variance Decomposition") plt.savefig(get_plot_path("bias_variance"))
import numpy as np #%% def synthetic_lst(): return [ Buff(c.update_freq, c.batch_size, c.env_path + 'synth_1.pkl'), Buff(c.update_freq, c.batch_size, c.env_path + 'synth_2.pkl'), Buff(c.update_freq, c.batch_size, c.env_path + 'synth_3.pkl') ] #%% if __name__ == '__main__': buff = Buff(c.update_freq, c.batch_size, c.buffer_path, buffer_size=200000, dev_size=0.01) buff.cut_old() lst_buff = synthetic_lst() mod = Model(buff.buffer_cols, c.action_nums, c.params, c.mod_path, c.mod_records_path, True) for experience in buff.samples(12000): mod.train_on_sample(experience, buff.dev_sample) if random.random() < 0.25: tbuff = np.random.choice(lst_buff) mod.train_on_sample(tbuff.one_sample(), tbuff.dev_sample) mod.plot(helpers.get_plot_path() + 'plot.pdf') mod.clean_up(True)
buff = Buff(update_freq, batch_size, c.buffer_path) #%% buff.cut_old() print(buff.buffer_df.shape) #%% lst_buff = synthetic_lst() #%% grid_save = [] save_freq = update_freq * 10 for num, param in enumerate(ParameterGrid(params)): generator = buff.samples(16000) mod = Model(buff.buffer_cols, c.action_nums, param, c.mod_path, c.mod_records_path, True) print(param) for experience in generator: #s0, actions, rewards, s1, done mod.train_on_sample(experience, buff.dev_sample) if random.random() < 0.25: tbuff = np.random.choice(lst_buff) mod.train_on_sample(tbuff.one_sample(), tbuff.dev_sample, toplot=False) mod.clean_up(save_mod=False) #save_mod=True mod.plot(helpers.get_plot_path() + str(num) + '.pdf') grid_save.append(mod) # break #%%