def chess_analysis(): # Start time count to gauge process run time start = time.time() api = KaggleApi() api.authenticate() # downloading datasets for Chess games api.dataset_download_files('arevel/chess-games') # Read data in chunks of 100000 rows and concatenate into one dataframe at a time to speed up read time zf = zipfile.ZipFile('chess-games.zip') csv = pd.read_csv(zf.open('chess_games.csv'), chunksize=100000) chess_df = pd.concat(csv) # Remove any duplicate user names to limit data to one game per user chess_df = chess_df.drop_duplicates(subset=['White', 'Black']) # remove any rows with stockfish evaluation as this clogs up the data at a later stage chess_df = chess_df.drop(chess_df[chess_df.AN.str.contains(r'[{}]')].index) # use iterrows to print out data for index, row in chess_df.head(1000).iterrows(): print(index, row) # reset index after dropping duplicate users and removing stockfish evaluations chess_df = chess_df.reset_index() # Define average elo rank per game chess_df['AverageElo'] = (chess_df['WhiteElo'] + chess_df['BlackElo']) / 2 # create lists of conditions to use for np.se;ect to add new columns to turn numeric values into grouped categories white_conditions = [ (chess_df['WhiteElo'] > 2700), (chess_df['WhiteElo'] < 2700) & (chess_df['WhiteElo'] >= 2500), (chess_df['WhiteElo'] < 2500) & (chess_df['WhiteElo'] >= 2400), (chess_df['WhiteElo'] < 2400) & (chess_df['WhiteElo'] >= 2300), (chess_df['WhiteElo'] < 2300) & (chess_df['WhiteElo'] >= 2200), (chess_df['WhiteElo'] < 2200) & (chess_df['WhiteElo'] >= 2000), (chess_df['WhiteElo'] < 2000) & (chess_df['WhiteElo'] >= 1800), (chess_df['WhiteElo'] < 1800) & (chess_df['WhiteElo'] >= 1600), (chess_df['WhiteElo'] < 1600) & (chess_df['WhiteElo'] >= 1400), (chess_df['WhiteElo'] < 1400) & (chess_df['WhiteElo'] >= 1200), (chess_df['WhiteElo'] < 1200) & (chess_df['WhiteElo'] >= 0) ] black_conditions = [ (chess_df['BlackElo'] >= 2700), (chess_df['BlackElo'] < 2700) & (chess_df['BlackElo'] >= 2500), (chess_df['BlackElo'] < 2500) & (chess_df['BlackElo'] >= 2400), (chess_df['BlackElo'] < 2400) & (chess_df['BlackElo'] >= 2300), (chess_df['BlackElo'] < 2300) & (chess_df['BlackElo'] >= 2200), (chess_df['BlackElo'] < 2200) & (chess_df['BlackElo'] >= 2000), (chess_df['BlackElo'] < 2000) & (chess_df['BlackElo'] >= 1800), (chess_df['BlackElo'] < 1800) & (chess_df['BlackElo'] >= 1600), (chess_df['BlackElo'] < 1600) & (chess_df['BlackElo'] >= 1400), (chess_df['BlackElo'] < 1400) & (chess_df['BlackElo'] >= 1200), (chess_df['BlackElo'] < 1200) & (chess_df['BlackElo'] >= 0) ] average_conditions = [ (chess_df['AverageElo'] >= 2700), (chess_df['AverageElo'] < 2700) & (chess_df['AverageElo'] >= 2500), (chess_df['AverageElo'] < 2500) & (chess_df['AverageElo'] >= 2400), (chess_df['AverageElo'] < 2400) & (chess_df['AverageElo'] >= 2300), (chess_df['AverageElo'] < 2300) & (chess_df['AverageElo'] >= 2200), (chess_df['AverageElo'] < 2200) & (chess_df['AverageElo'] >= 2000), (chess_df['AverageElo'] < 2000) & (chess_df['AverageElo'] >= 1800), (chess_df['AverageElo'] < 1800) & (chess_df['AverageElo'] >= 1600), (chess_df['AverageElo'] < 1600) & (chess_df['AverageElo'] >= 1400), (chess_df['AverageElo'] < 1400) & (chess_df['AverageElo'] >= 1200), (chess_df['AverageElo'] < 1200) & (chess_df['AverageElo'] >= 0) ] outcome_conditions = [(chess_df['Result']) == "1-0", (chess_df['Result']) == "0-1", (chess_df['Result']) == "1/2-1/2", (chess_df['Result']) == "*"] # create a list of the values to assign for each condition elo = [ 'Super GM', 'GM', 'GM/IM', 'FM/IM', 'CM/NM', 'Experts', 'Class A', 'Class B', 'Class C', 'Class D', 'Novices' ] outcome = ['White Wins', 'Black Wins', 'Draw', 'No Result'] # create new columns and use np.select to assign values to it using the lists as arguments chess_df['WhiteEloRank'] = np.select(white_conditions, elo) chess_df['BlackEloRank'] = np.select(black_conditions, elo) chess_df['AverageEloRank'] = np.select(average_conditions, elo) chess_df['Outcome'] = np.select(outcome_conditions, outcome) # create dataframe for moves moves_df = chess_df["AN"].str.split(" ", n=30, expand=True) moves_df = moves_df.drop(moves_df.iloc[:, 0:31:3], axis=1) # append moves dataframe to chess dataframe chess_df = pd.concat([chess_df, moves_df], axis=1) chess_df.reset_index(inplace=True) # sort data from lowest average elo to highest average elo chess_df = chess_df.sort_values(by='AverageElo', ascending=False) # change data type from object to numeric values chess_df[["WhiteElo", "BlackElo", "AverageElo"]] = chess_df[["WhiteElo", "BlackElo", "AverageElo"]].\ apply(pd.to_numeric) classical_df1 = chess_df[chess_df.Event == ' Classical '] classical_df2 = chess_df[chess_df.Event == 'Classical '] classical = pd.merge(classical_df1, classical_df2, how='outer') classical_tournament_df1 = chess_df[chess_df.Event == ' Classical tournament '] classical_tournament_df2 = chess_df[chess_df.Event == 'Classical tournament '] classical_tournament = pd.merge(classical_tournament_df1, classical_tournament_df2, how='outer') blitz_df1 = chess_df[chess_df.Event == ' Blitz '] blitz_df2 = chess_df[chess_df.Event == 'Blitz '] blitz = pd.merge(blitz_df1, blitz_df2, how='outer') blitz_tournament_df1 = chess_df[chess_df.Event == ' Blitz tournament '] blitz_tournament_df2 = chess_df[chess_df.Event == 'Blitz tournament '] blitz_tournament = pd.merge(blitz_tournament_df1, blitz_tournament_df2, how='outer') bullet_df1 = chess_df[chess_df.Event == ' Bullet '] bullet_df2 = chess_df[chess_df.Event == 'Bullet '] bullet = pd.merge(bullet_df1, bullet_df2, how='outer') bullet_tournament_df1 = chess_df[chess_df.Event == ' Bullet tournament '] bullet_tournament_df2 = chess_df[chess_df.Event == 'Bullet tournament '] bullet_tournament = pd.merge(bullet_tournament_df1, bullet_tournament_df2, how='outer') correspondence_df1 = chess_df[chess_df.Event == ' Correspondence '] correspondence_df2 = chess_df[chess_df.Event == 'Correspondence '] correspondence = pd.merge(correspondence_df1, correspondence_df2, how='outer') # Plot results # Categorical Data plots = ['Termination', 'Outcome', 'AverageEloRank'] plots_1 = ['AverageElo'] plots_2 = [1, 2] game_types = [ classical, classical_tournament, blitz, blitz_tournament, bullet, bullet_tournament, correspondence ] game_types_str = [ 'Classical', 'Classical Tournament', 'Blitz', 'Blitz Tournament', 'Bullet', 'Bullet Tournament', 'Correspondence' ] z = 0 y = 0 w = 0 for x in game_types: a = 1 # number of rows, set to 1 to retrieve individual graph groups based on game type b = int(len(plots)) # number of columns c = 1 # initialize plot counter d = 1 # number of rows, set to 1 to retrieve individual graph groups based on game type e = int(len(plots_1)) # number of columns f = 1 # initialize plot counter g = 1 # number of rows, set to 1 to retrieve individual graph groups based on game type h = int(len(plots_2)) # number of columns k = 1 # initialize plot counter for i in plots: plt.subplot(a, b, c) plt.title(str(game_types_str[z])) plt.xlabel(i) plt.subplots_adjust(bottom=0.095, top=0.97, hspace=1, wspace=0.45) sns.countplot(x=x[i]) plt.xticks(rotation=30) c = c + 1 z = z + 1 plt.show() plt.clf() for i in plots_1: plt.subplot(d, e, f) plt.title(str(game_types_str[y])) plt.xlabel(i) plt.subplots_adjust(bottom=0.095, top=0.97, hspace=1) sns.histplot(x=x[i], kde=True, bins=25) plt.xticks(rotation=30) f = f + 1 y = y + 1 plt.show() plt.clf() for i in plots_2: plt.subplot(g, h, k) plt.title(str(game_types_str[w])) plt.xlabel(i) plt.subplots_adjust(bottom=0.095, top=0.97, hspace=1) sns.countplot(x=x[i]) plt.xticks(rotation=30) k = k + 1 w = w + 1 plt.show() plt.clf() end = time.time() print("Run Time: ", (end - start), 'Seconds')
def data_analisys(df): f, axes = plt.subplots(1, 2, figsize=(20, 4)) sn.histplot(data=df['energy_100g'], ax=axes[0]) sn.boxplot(data=df['energy_100g'], ax=axes[1]) plt.show()
def histogram_weighted_enablement_series(enablement_series, weights): expanded_series = expand_results_by_weights(enablement_series, weights) print(expanded_series) sns.histplot(data=list(map(int,expanded_series))).set(xlabel='degree of enablement',ylabel='number of TWers') plt.show()
} with open(os.path.join(root_mod, 'metrics_ceil_general.json'), 'w') as f: json.dump(metrics, f) log.info(f'Metrics of the general model (CEIL): {metrics}.') # plot the residuals of the general model for l in labs: fig, ax = plt.subplots(1, 1, figsize=(6, 5)) sns.histplot(residuals['training'][l], binwidth=1, alpha=0.35, color='tab:blue', label='training', log_scale=(False, True), ax=ax) sns.histplot(residuals['validation'][l], binwidth=1, alpha=0.35, color='tab:red', label='validation', log_scale=(False, True), ax=ax) sns.histplot(residuals['test'][l], binwidth=1, alpha=0.35,
def plot_single_neuron(data, name: str) -> sns.JointGrid: """Generates a distance-collisions jointplot of a single neuron. For the given object with the neuron name, the function will generate a jointplot with the normalized collision chance in the y axis and the topological distance on the x. The center of the jointplot is a hexbin plot and the sides are the distributions of the variables. Parameters ---------- data : CollisionsDistNaive name : str The neuron's name Returns ------- sns.JointGrid """ single_neuron = pd.concat([data.parsed_dend, data.parsed_axon], ignore_index=True) single_neuron = single_neuron.rename( { "dist": "Length of branch [um]", "coll": "Chance for collision", "coll_normed": "Normalized chance for collision", }, axis=1, ) g = sns.JointGrid(height=8) x_ax = single_neuron.query('neurite == "axon"')["Length of branch [um]"] y_ax = single_neuron.query( 'neurite == "axon"')["Normalized chance for collision"] extent_ax = (x_ax.min(), x_ax.max(), 0, y_ax.max()) x_dend = single_neuron.query( 'neurite == "dendrite"')["Length of branch [um]"] y_dend = single_neuron.query( 'neurite == "dendrite"')["Normalized chance for collision"] g.ax_joint.hexbin( x=x_ax, y=y_ax, gridsize=30, alpha=0.7, edgecolors=None, cmap="Greens", mincnt=1, extent=extent_ax, ) g.ax_joint.hexbin( x=x_dend, y=y_dend, gridsize=30, alpha=0.7, edgecolors=None, cmap="Oranges", mincnt=1, extent=extent_ax, ) sns.histplot(x=x_ax, alpha=0.5, ax=g.ax_marg_x, color="C2") sns.histplot(x=x_dend, alpha=0.5, ax=g.ax_marg_x, color="C1") sns.histplot(y=y_ax, alpha=0.5, ax=g.ax_marg_y, color="C2", bins=20) sns.histplot(y=y_dend, alpha=0.5, ax=g.ax_marg_y, color="C1", bins=5) g.ax_joint.set_xlabel("Length of branch [um]") g.ax_joint.set_ylabel("Normalized chance for collision") plt.tight_layout() sns.despine(trim=True, ax=g.ax_joint) g.ax_joint.figure.savefig( "results/for_article/fig_coll_agg/coll_vs_dist_single_neuron.pdf", transparent=True, dpi=300, ) plt.show(block=False) return g
def solve_model(self): """ Runs the entire model. """ t0 = time.time() #start the clock # a. solve household problem print("\nSolving household problem...") self.pol_sav, self.pol_cons, self.it_hh = solve_hh(self.params_pfi) if self.it_hh < self.maxit-1: print(f"Policy function convergence in {self.it_hh} iterations.") else : raise Exception("No policy function convergence.") t1 = time.time() print(f'Household problem time elapsed: {t1-t0:.2f} seconds') # b. stationary distribution # discrete approximation if self.distribution_method == 'discrete': print("\nStationary Distribution Solution Method: Discrete Approximation and Forward Iteration on Density Function") print("\nComputing...") # i. approximate stationary density self.stationary_pdf, self.it_pdf = discrete_stationary_density(self.pol_sav, self.params_discrete) if self.it_pdf < self.maxit-1: print(f"Convergence in {self.it_pdf} iterations.") else : raise Exception("No density function convergence.") # ii. steady state assets self.a_ss = np.sum(np.dot(self.stationary_pdf, self.grid_a_fine)) # iii. marginal wealth density self.stationary_wealth_pdf = np.sum(self.stationary_pdf, axis=0) t2 = time.time() print(f'Density approximation time elapsed: {t2-t1:.2f} seconds') # eigenvector if self.distribution_method == 'eigenvector': print("\nStationary Distribution Solution Method: Eigenvector Method for Exact Stationary Density") print("\nComputing...") self.stationary_pdf, self.Q = self.eigen_stationary_density() # i. aggregate asset holdings self.a_ss = np.sum(np.dot(self.stationary_pdf, self.grid_a_fine)) # iii. marginal wealth density self.stationary_wealth_pdf = np.sum(self.stationary_pdf, axis=0) t2 = time.time() print(f'Density computation time elapsed: {t2-t1:.2f} seconds') # monte carlo simulation if self.simulate ==1 or self.distribution_method == 'monte carlo': if self.distribution_method == 'monte carlo': print("\nStationary Distribution Solution Method: Monte Carlo Simulation") print("\nSimulating...") # i. simulate markov chain and endog. variables self.sim_c, self.sim_sav, self.sim_z, self.sim_m, self.euler_error_sim = simulate_MarkovChain( self.pol_cons, self.pol_sav, self.params_sim ) # ii. steady state assets if self.distribution_method == 'monte carlo': self.a_ss = np.mean(self.sim_sav[self.sim_burnin :]) # iii. max and average euler error error, ignores nan which is when the euler equation does not bind self.max_error_sim = np.nanmax(self.euler_error_sim) self.avg_error_sim = np.nanmean(np.nanmean(self.euler_error_sim, axis=1)) t2 = time.time() print(f'Simulation time elapsed: {t2-t1:.2f} seconds') else: t2 = time.time() # c. calculate euler equation error across the state space if self.full_euler_error: print("\nCalculating Euler Equation Error...") self.euler_error, self.max_error, self.avg_error = self.ee_error() t3 = time.time() print(f'Euler Eq. error calculation time elapsed: {t3-t2:.2f} seconds') else: t3 = time.time() # d. plot if self.plott: print('\nPlotting...') ##### Solutions ##### plt.plot(self.grid_a, self.pol_sav.T) plt.title("Savings Policy Function") plt.plot([self.a_bar,self.a_max], [self.a_bar,self.a_max],linestyle=':') plt.legend(['z='+str(self.grid_z[0]),'z='+str(self.grid_z[1]),'45 degree line']) plt.xlabel('Assets') #plt.savefig('savings_policyfunction_pfi_v1.pdf') plt.show() plt.plot(self.grid_a, self.pol_cons.T) plt.title("Consumption Policy Function") plt.legend(['z='+str(self.grid_z[0]),'z='+str(self.grid_z[1])]) plt.xlabel('Assets') #plt.savefig('consumption_policyfunction_pfi_v1.pdf') plt.show() if self.full_euler_error: plt.plot(self.grid_a_fine, self.euler_error.T) plt.title('Log10 Euler Equation Error') plt.xlabel('Assets') #plt.savefig('log10_euler_error_pfi_v1.pdf') plt.show() ##### Distributions #### if self.distribution_method == 'discrete' or self.distribution_method == 'eigenvector': # joint stationary density plt.plot(self.grid_a_fine, self.stationary_pdf.T) plt.title("Joint Stationary Density (Discrete Approx.)") if self.distribution_method == 'discrete' else plt.title("Joint Stationary Density (Eigenvector Method)") plt.xlabel('Assets') plt.legend(['z='+str(self.grid_z[0]),'z='+str(self.grid_z[1])]) #plt.savefig('joint_density_pfi_v1_discrete.pdf') if self.distribution_method == 'discrete' else plt.savefig('joint_density_pfi_v1_eigenvector.pdf') plt.show() # marginal wealth density plt.plot(self.grid_a_fine, self.stationary_wealth_pdf) plt.title("Stationary Wealth Density (Discrete Approx.)") if self.distribution_method == 'discrete' else plt.title("Stationary Wealth Density (Eigenvector Method)") plt.xlabel('Assets') #plt.savefig('wealth_density_pfi_v1_discrete.pdf') if self.distribution_method == 'discrete' else plt.savefig('wealth_density_pfi_v1_eigenvector.pdf') plt.show() if self.distribution_method == 'monte carlo': sns.histplot(self.sim_sav[-1,:], bins=100, stat='density') plt.title("Stationary Wealth Density (Monte Carlo Approx.)") plt.xlabel('Assets') # plt.savefig('wealth_density_pfi_v1_montecarlo.pdf') plt.show() ##### Simulation ##### if self.simulate or self.distribution_method == 'monte carlo': fig, (ax1, ax2) = plt.subplots(2,1,figsize=(10,6)) fig.tight_layout(pad=4) #first individual over first 100 periods ax1.plot(np.arange(0,99,1), self.sim_sav[:99,1], np.arange(0,99,1), self.sim_c[:99,1], np.arange(0,99,1), self.sim_z[:99,1],'--') ax1.legend(['Savings', 'Consumption', 'Income']) ax1.set_title('Simulation of First Household During First 100 Periods') #averages over entire simulation ax2.plot(np.arange(0,self.simT,1), np.mean(self.sim_sav, axis=1), np.arange(0,self.simT,1), np.mean(self.sim_c, axis=1) ) ax2.legend(['Savings', 'Consumption', 'Income']) ax2.set_title('Simulation Average over 50,000 Households') #plt.savefig('simulation_pfi_v1.pdf') plt.show() t4 = time.time() print(f'Plot time elapsed: {t4-t3:.2f} seconds') # e. print solution if self.distribution_method != 'none': print("\n-----------------------------------------") print("Stationary Equilibrium Solution") print("-----------------------------------------") print(f"Steady State Assets = {self.a_ss:.2f}") if self.simulate or self.distribution_method == 'monte carlo' or self.full_euler_error: print("\n-----------------------------------------") print("Log10 Euler Equation Error Evaluation") print("-----------------------------------------") if self.full_euler_error: print(f"\nFull Grid Evalulation: Max Error = {self.max_error:.2f}") print(f"Full Grid Evalulation: Average Error = {self.avg_error:.2f}") if self.simulate or self.distribution_method == 'monte carlo': print(f"\nSimulation: Max Error = {self.max_error_sim:.2f}") print(f"Simulation: Average Error = {self.avg_error_sim:.2f}") t5 = time.time() print(f'\nTotal Run Time: {t5-t0:.2f} seconds')
import Conexao as con import pandas as pd import seaborn as sns conexao = con.Conexao() df = conexao.getCovidImpactDataFrame() conexao.fecharConexao() sns.histplot( df['PercentOfBaseline']).set_title('Distribuição de PercentOfBaseline') sns.scatterplot(x=df["Country"], y=df["PercentOfBaseline"], data=df).set_title("Relação entre países e PercentOfBaseline") sns.set_style("dark") g = sns.scatterplot(x=df["City"], y=df["PercentOfBaseline"], data=df) g.set_xticklabels(g.get_xticklabels(), rotation=20) g.set_title("Relação entre cidades e PercentOfBaseline") aux_df = pd.DataFrame(df["Date"].value_counts()) aux_df.columns = ["QtdVoos"] sns.scatterplot(x=aux_df.index, y=aux_df["QtdVoos"], data=aux_df) sns.scatterplot(x=df["Date"], y=df["PercentOfBaseline"], data=df)
def main_fig(network_type_list, space_list, N_list, d_list, seed_list, weight_list, dynamics): """TODO: Docstring for main_fig. :network_type: TODO :N: TODO :d: TODO :seed: TODO :dynamics: TODO :returns: TODO """ colors = ['Reds', 'Blues', 'Greens'] letters = list('abcdefghijklmnopqrstuvwxyz') fig = plt.figure(figsize=(13, 20)) gs = mpl.gridspec.GridSpec( nrows=8, ncols=9, height_ratios=[0.9, 1, 1, 1, 1.15, 1, 1, 1], width_ratios=[1.05, 0.25, 0.75, 0.10, 0.75, 0.25, 0.75, 0.85, 1]) #plt.rcParams.update({"text.usetex": True,}) ax = fig.add_subplot(gs[0, :]) ax.set_axis_off() dxdt = r'$\frac{dx_i}{dt} = F(x_i) + w \sum_{j=1}^{N} A_{ij} G(x_i, x_j)$' t = ax.text(0.3, 0.7, dxdt, ha="center", va="center", rotation=0, size=15, bbox=dict(boxstyle="round,pad=0.3", fc="tab:grey", ec="k", lw=1, alpha=0.5)) dxdt = r'$\frac{dx}{dt} = F(x) + w \beta G(x, x)$' t = ax.text(0.91, 0.7, dxdt, ha="center", va="center", rotation=0, size=15, bbox=dict(boxstyle="round,pad=0.3", fc="tab:grey", ec="k", lw=1, alpha=0.5)) ax.annotate('One-dimensional Reduction', xy=(1.2, 0.8), xytext=(0.55, 0.7), xycoords='axes fraction', fontsize=14, color='tab:grey', weight='bold') #plt.rcParams.update({"text.usetex": False,}) for (i, network_type), N, d, seed, space in zip(enumerate(network_type_list), N_list, d_list, seed_list, space_list): if network_type == 'ER': color_bias = 0 node_size_bias = 1 else: color_bias = 0.1 node_size_bias = 5 A_unit, A_interaction, index_i, index_j, cum_index = network_generate( network_type, N, 1, 0, seed, d) G = nx.from_numpy_array(A_unit) feature = feature_from_network_topology(A_unit, G, space, tradeoff_para=0.5, method='degree') if i == 2: xlabel = '$w$' xk_label = '$k$' else: xlabel = '' xk_label = '' if i == 1: ylabel = '$y^{(\\mathrm{gl})}_s$' ylabel_xs = '$x_s$' yk_label = '$P(k)$' else: ylabel_xs = '' ylabel = '' yk_label = '' ax = fig.add_subplot(gs[i + 1, 0:2]) ax.annotate('$A_{ij}$', xy=(-0.7, 0.5), xytext=(0.9, 0.9), xycoords='axes fraction', fontsize=15, color='k', alpha=.8, weight='bold') title_letter = f'({letters[i+0]}1)' title_letter = f'({letters[0]}{i+1})' plot_network_topology(ax, network_type, N, A_unit, colors[i], color_bias, node_size_bias, title_letter) ax.annotate(network_type, xy=(-0.7, 0.5), xytext=(-0.85 - 0.08 * len(network_type), 0.45), xycoords='axes fraction', fontsize=15, color=sns.color_palette(colors[i])[-1], alpha=.5, weight='bold') if i == 0: ax.annotate('Topology', xy=(-0.7, 0.5), xytext=(0.25, 1.4), xycoords='axes fraction', fontsize=15, color='tab:grey', alpha=.5, weight='bold') xs_multi = read_xs(network_type, N, space, d, seed, N, weight_list) ax = fig.add_subplot(gs[i + 1, 2]) title_letter = f'({letters[i+0]}2)' title_letter = f'({letters[1]}{i+1})' ax.annotate(title_letter, xy=(-0.2, 1.03), xycoords="axes fraction", size=labelsize * 0.8) simpleaxis(ax) k = np.sum(A_unit > 0, 0) if network_type == 'SF': sns.histplot(k, bins=20, stat='density', ax=ax, color=sns.color_palette(colors[i])[-1], alpha=0.5) ax.set_yscale('log') else: sns.histplot(k, bins=20, stat='density', ax=ax, color=sns.color_palette(colors[i])[-1], alpha=0.5) ax.set_xlabel(xk_label, fontsize=labelsize) ax.set_ylabel(yk_label, fontsize=labelsize) ax = fig.add_subplot(gs[i + 1, 4:6]) title_letter = f'({letters[i+0]}3)' title_letter = f'({letters[2]}{i+1})' linewidth = 2 alpha = 0.8 plot_xs_weight(ax, xlabel, ylabel_xs, A_unit, len(A_unit), weight_list, xs_multi, colors[i], linewidth, alpha, color_bias, title_letter) if i == 0: ax.annotate('Dynamics', xy=(-0.7, 0.5), xytext=(0.25, 1.4), xycoords='axes fraction', fontsize=15, color='tab:grey', alpha=.5, weight='bold') ax = fig.add_subplot(gs[i + 1, 6]) ax.set_axis_off() ax.annotate(' ' * 10, xy=(0.4, 0.50), xytext=(0.75, 0.5), xycoords='axes fraction', ha='center', va='bottom', bbox=dict(boxstyle='rarrow, pad=0.6', fc=sns.color_palette(colors[i])[-1], ec='k', lw=2, alpha=0.5)) ax = fig.add_subplot(gs[i + 1, 7]) title_letter = f'({letters[i+0]}4)' title_letter = f'({letters[3]}{i+1})' m = 1 xs_group = read_xs(network_type, N, space, d, seed, m, weight_list) group_index = group_index_from_feature_Kmeans(feature, m) A_reduced, _, _ = reducednet_effstate(A_unit, xs_multi[0], group_index) ax.annotate('$\\beta$', xy=(-0.7, 0.5), xytext=(1, 1), xycoords='axes fraction', fontsize=15, color='k', alpha=.8, weight='bold') if i == 0: ax.annotate('Topology', xy=(-0.7, 0.5), xytext=(0.25, 1.4), xycoords='axes fraction', fontsize=15, color='tab:grey', alpha=.5, weight='bold') plot_reduced_network_topology(ax, network_type, A_reduced, m, colors[i], color_bias, node_size_bias, title_letter) ax = fig.add_subplot(gs[i + 1, 8]) title_letter = f'({letters[i+0]}5)' title_letter = f'({letters[4]}{i+1})' if i == 0: ax.annotate('Dynamics', xy=(-0.7, 0.5), xytext=(0.25, 1.4), xycoords='axes fraction', fontsize=15, color='tab:grey', alpha=.5, weight='bold') plot_xs_weight(ax, xlabel, ylabel, A_reduced, m, weight_list, xs_group, colors[i], linewidth, alpha, color_bias, title_letter, xs_multi, A_unit, group_index) if i == 2: groundtruth, = ax.plot([], [], color='tab:grey', alpha=0.8, label='ground truth', linewidth=2) reduced1, = ax.plot([], [], color=sns.color_palette(colors[0])[-1], linewidth=3) reduced2, = ax.plot([], [], color=sns.color_palette(colors[1])[-1], linewidth=3) reduced3, = ax.plot([], [], color=sns.color_palette(colors[2])[-1], linewidth=3) ax.legend([groundtruth, (reduced1, reduced2, reduced3)], ['ground truth', 'reduced'], fontsize=legendsize * 0.7, frameon=False, loc=4, bbox_to_anchor=(1.05, -1.8), handler_map={tuple: HandlerTuple(ndivide=None)}) for j, m in enumerate([3, 5, 10]): if i == 2: xlabel = '$w$' else: xlabel = '' if i == 1: ylabel = '$y^{(\\mathrm{gl})}_s$' else: ylabel = '' if j == 0: ax = fig.add_subplot(gs[4 + i + 1, 0]) elif j == 1: ax = fig.add_subplot(gs[4 + i + 1, 3:5]) elif j == 2: ax = fig.add_subplot(gs[4 + i + 1, 7]) title_letter = f'({letters[i+0]}{6+j*2})' title_letter = f'({letters[5+j*2]}{i+1})' if i == 0: ax.annotate(f'$m={m}$', xy=(0.7, 0.5), xytext=(0.95, 1.25), xycoords='axes fraction', fontsize=15, color='tab:grey', weight='bold') ax.annotate('$\\beta_{ab}$', xy=(-0.7, 0.5), xytext=(0.85, 0.85), xycoords='axes fraction', fontsize=15, color='k', alpha=.8, weight='bold') xs_group = read_xs(network_type, N, space, d, seed, m, weight_list) group_index = group_index_from_feature_Kmeans(feature, m) A_reduced, _, _ = reducednet_effstate( A_unit, np.random.random(len(A_unit)), group_index) plot_reduced_network_topology(ax, network_type, A_reduced, m, colors[i], color_bias, node_size_bias, title_letter) if j == 0: ax = fig.add_subplot(gs[4 + i + 1, 1:3]) elif j == 1: ax = fig.add_subplot(gs[4 + i + 1, 5:7]) elif j == 2: ax = fig.add_subplot(gs[4 + i + 1, 8]) title_letter = f'({letters[i+0]}{7+j*2})' title_letter = f'({letters[6+j*2]}{i+1})' #plot_xs_weight(ax, xlabel, ylabel, A_reduced, m, weight_list, xs_group, colors[i], linewidth, alpha, color_bias, title_letter, xs_multi, A_unit, group_index) plot_ygl_weight(ax, xlabel, ylabel, A_reduced, m, weight_list, xs_group, colors[i], linewidth, alpha, color_bias, title_letter, xs_multi, A_unit, group_index) ax = fig.add_subplot(gs[4, :]) ax.set_axis_off() ax.annotate('m-dimensional Reduction', xy=(0.7, 0.5), xytext=(0.22, 0.5), xycoords='axes fraction', fontsize=14, color='tab:grey', weight='bold') dxdt = r'$\frac{dy^{(a)}}{dt} = F(x_i) + w \sum_{b=1}^{m} \beta_{ab} G(y^{(a)}, y^{(b)})$' t = ax.text(0.63, 0.58, dxdt, ha="center", va="center", rotation=0, size=15, bbox=dict(boxstyle="round,pad=0.3", fc="tab:grey", ec="k", lw=1, alpha=0.5)) draw_brace(ax, (0.12 * ax.get_xlim()[1], 0.8 * ax.get_xlim()[1]), 'tab:grey', 3, '') plt.subplots_adjust(left=0.05, right=0.95, wspace=0.25, hspace=0.70, bottom=0.05, top=0.95)
fig = plt.figure(figsize=(15, 8)) nx, ny = 2, 3 for i in range(5): ax = fig.add_subplot(nx, ny, i + 1) ax.set_title(allnames[i]) bins = 10 binrange_min = min(alldata_LA[i].min(), alldata_ORI[i].min()) binrange_max = max(alldata_LA[i].max(), alldata_ORI[i].max()) sns.histplot(alldata_LA[i], ax=ax, bins=bins, binrange=(binrange_min, binrange_max), common_bins=True, kde=False, label="Look ahead", color="orange", alpha=0.3) sns.histplot(alldata_ORI[i], ax=ax, bins=bins, binrange=(binrange_min, binrange_max), common_bins=True, kde=False, label="Original", color="blue", alpha=0.2) ax.axvline(x=alldata_LA[i].mean(), label="means",
sns.barplot(x = df['City'].value_counts().values, y = df['City'].value_counts().index) plt.title('Population per city') plt.xlabel('Counts') plt.ylabel('Cities') plt.figure(figsize=(10, 5)) sns.countplot(x="Gender", hue="Illness", palette="rocket", data=df) g = sns.FacetGrid(df, col='Illness', height=5) g = g.map(sns.histplot, "Age") plt.figure(figsize=(10, 5)) sns.countplot(x="City", hue="Gender", palette="rocket", data=df) plt.figure(figsize=(10, 5)) sns.histplot(df["Age"], color='r') plt.title("Age distribution") plt.figure(figsize=(10, 5)) sns.distplot(df["Income"], color='g') plt.title("Income distribution") fig = plt.figure(figsize=(10, 5)) sns.histplot(df[df["Gender"] == "Male"]["Income"], color='b') sns.histplot(df[df["Gender"] == "Female"]["Income"], color='r') fig.legend(labels=['Male', 'Female']) plt.title("Income distribution - Man and Woman") cities = ['Dallas', 'New York City', 'Los Angeles', 'Mountain View', 'Boston', 'Washington D.C.', 'Austin', 'San Diego'] colors = ['orange', 'red', 'blue', 'teal', 'brown', 'turquoise', 'olive', 'plum'] fig = plt.figure(figsize=(10, 5))
def plot_distribution( dist, dist_str: str, agg_df: pd.DataFrame, circle_group: str, area_group: str, param: str, reference_value_dict: Dict[str, float], ax, legend: bool, area_bin_col: str, circle_count_col: str, ): """ Plot beta distribution of circle count and total area grouped for param. """ # Locate values with certain circle count and total area group pair_df = agg_df.loc[agg_df[circle_count_col] == circle_group].loc[ (agg_df[area_bin_col] == area_group) ] # Get parameter values values = pair_df[param].values # Value interval delta = abs(max(values) - min(values)) # Fit beta distribution a, b, loc, scale = dist.fit(values) beta_dist = dist(a, b, loc=loc, scale=scale) # Determine x value range x = np.linspace(min(values), max(values)) # Calculate y values for xs y = beta_dist.pdf(x) # Plot x, y values sns.lineplot( ax=ax, x=x, y=y, color="black", label="Beta Distribution PDF Fit" if legend else None, legend=legend, ) # Make a color palette colors = sns.color_palette("Reds", n_colors=3) # Color generator (infinite) color_generator = colorgen(colors) # Choose the probability thresholds to plot probs = list(reversed(np.arange(0.25, 1.0, step=0.25))) # Iterate over probabilities for interval, prob in zip([beta_dist.interval(prob) for prob in probs], probs): # Plot vertical lines at probabilities at interval edges prob_color = next(color_generator) prob_text = f"{int(prob * 100)} % of iterations." # Iterate over the two interval edges for xloc, xoff in zip(interval, (-1, 1)): interval_text = f"${round(xloc, 2)}$" # Plot vertical line at interval edge ax.vlines( xloc, ymin=0, ymax=beta_dist.pdf(xloc), color=prob_color, label=prob_text if xloc != interval[-1] else None, ) # Plot the interval edge value as text ax.text( x=xloc + (delta * 0.02 * xoff), y=beta_dist.pdf(xloc) / 2.25, s=interval_text, rotation=90, ha="center", fontstyle="italic", va="center", fontsize=8, ) # Test hashing the areas # fill_xs = np.linspace(*interval) # ax.fill_between(fill_xs, y1=beta_dist.pdf(fill_xs), facecolor=None, # edgecolor=None, hatch=next(hatch_generator), alpha=0.01) # Plot reference value ax.axvline(reference_value_dict[param], linestyle="dashed", color="black") # Annotate the reference value ax.annotate( text="Reference value", xy=(reference_value_dict[param], max(y) * 1.02), xytext=(reference_value_dict[param] - 0.4 * delta, max(y) * 1.03), arrowprops={"arrowstyle": "->"}, ) # Remove top and right spines sns.despine(top=True, right=True) # Set x and y labels ax.set_ylabel("Probability Density Function (PDF)") ax.set_xlabel(param) # Plot the background histplot of true values sns.histplot( ax=ax, x=values, stat="density", alpha=0.1, edgecolor=None, color="black", label=f"{utils.param_renamer(param)} Histogram", ) # Set legend for plot if legend: ax.legend(edgecolor="black", loc="upper right") else: ax.legend().remove() def dist_param_str(value: float, name: str): """ Make string repr from param value. """ return f"${name} = {round(value, 3)}$" # Kolmigoroff-Smirnov test kstest_result = stats.kstest(values, dist_str, args=(a, b, loc, scale)) statistic = kstest_result[0] pvalue = kstest_result[1] # Collect some distribution parameters into multi-line-string vals = ( a, b, beta_dist.median(), beta_dist.std(), beta_dist.var(), statistic, pvalue, ) names = ( r"\alpha", r"\beta", "median", "std", "var", r"KS\ statistic", r"KS\ pvalue", ) assert len(vals) == len(names) param_text = "Beta Distribution\n" for val, name in zip(vals, names): param_text += dist_param_str(val, name) param_text += "\n" if name != names[-1] else "" # Plot the collected text ax.text( 0.1, 0.25, s=param_text, ha="center", ma="right", fontsize=8, transform=ax.transAxes, ) # Figure title circle_group_text = circle_group.replace("-", " to ") ax.set_title( f"Subsampling iterations with circle count from {circle_group_text}" " and total area between " f"{int(area_group[0])*1000}-{int(area_group[2])*1000} $m^2$." ) # Set x scale ax.set_xlim(min(values) - 0.25 * delta, 0.9 * max(values) + 0.5 * delta) # Set y scale ax.set_ylim(0, max(y) * 1.2) # Set param name nicely ax.set_xlabel(utils.param_renamer(param))
def plot_distribution(df, col_name, vlabel, name_file, close_file=True): s = df[col_name].dropna().sort_index() vmin = round_down(s.min(), -1) if s.max() < 10: vmax = round_up(s.max(), -1) else: vmax = round_up(s.max() * 1.1, -1) vstep = (vmax - vmin) * 10**-1 fig = plt.figure(constrained_layout=True, figsize=(8, 6), facecolor="lightgray") fig.suptitle(col_name.upper(), fontweight='bold') gs = GridSpec(2, 2, figure=fig, left=0.1, right=0.85, top=0.950, bottom=0.1, hspace=0.0125, height_ratios=[2, 1], wspace=0.005, width_ratios=[1, 2]) ax_histogram = fig.add_subplot(gs[0, 1]) ax_boxplot = fig.add_subplot(gs[0, 0]) ax_timeseries = fig.add_subplot(gs[1, :]) #boxplot sns.boxplot(data=s, whis=[0, 100], orient="v", color='lightblue', linewidth=1, saturation=1, zorder=3, ax=ax_boxplot) sns.stripplot(data=s, size=2.5, orient="v", color=".3", linewidth=0, ax=ax_boxplot) ax_boxplot.set_ylim(vmin - vstep, vmax + vstep) ax_boxplot.set_xticklabels("") ax_boxplot.set_ylabel(vlabel) #, fontweight='bold') ax_boxplot.grid(axis="y", ls="--", lw=0.75, zorder=2) ax_boxplot.set_axisbelow(True) ax_boxplot.set_title("Boxplot") #, fontweight='bold') ax_boxplot.xaxis.set_ticks_position('none') #histogram if s.count() >= 100: kbins = np.round(1 + 3.322 * np.log10(s.count())).astype(int) else: kbins = np.round(np.sqrt(s.count())).astype(int) sns.histplot(s, stat="probability", color='lightblue', bins=kbins, binrange=(vmin, vmax), zorder=3, ax=ax_histogram) ax_histogram.set_xlim(vmin - vstep, vmax + vstep) ax_histogram.set_ylim(0, 1) ax_histogram.grid(ls="--", lw=0.75, zorder=1) ax_histogram.set_ylabel("Probabilidad (%)") #, fontweight='bold') ax_histogram.set_xlabel(vlabel) #, fontweight='bold') ax_histogram.set_axisbelow(True) ax_histogram.yaxis.set_major_locator( mticker.FixedLocator(ax_histogram.get_yticks())) ax_histogram.set_yticklabels( mticker.FormatStrFormatter('%.0f').format_ticks( ax_histogram.get_yticks() * 100)) ax_histogram.set_title("Histograma") #, fontweight='bold') # #time series ax_timeseries.plot(s, label="Datos", c='k', lw=1) ax_timeseries.set_xlim(s.index.min(), s.index.max()) ax_timeseries.set_ylim(vmin, vmax) ax_timeseries.grid(axis="both", ls="--", lw=0.75, zorder=2) ax_timeseries.set_ylabel(vlabel) #, fontweight='bold') ax_timeseries.set_xlabel("Tiempo [años]") #, fontweight='bold') ax_timeseries.set_title("Serie de Tiempo") #, fontweight='bold') ax_timeseries.legend(loc=0, ncol=2) fig.savefig("Output/Plot/" + name_file) if close_file == True: plt.close(fig) return
for mouse in mouse_ids: mouse_results = results[results['mouse_id'] == mouse] max_reward = mouse_results['correct'].max() best_day = mouse_results[mouse_results['correct'] == max_reward] incorrects.append(best_day['incorrect'].values[0]) max_rewards.append(max_reward) best_days.append(best_day['day'].values[0]) # Get trial data from best days trials = pd.DataFrame(cohort.get_trials()) trials["grasp_latency"] = trials.end - trials.start latencies = [] for i, mouse in enumerate(mouse_ids): mouse_trials = trials[trials['mouse_id'] == mouse] day_trials = mouse_trials[mouse_trials['day'] == best_days[i]] corrects = day_trials[day_trials['outcome'] == Outcomes.CORRECT] latencies.append(corrects["grasp_latency"]) all_latencies = pd.concat(latencies, axis=1, keys=mouse_ids) _, axes = plt.subplots(2, 2) sns.histplot(data=all_latencies, ax=axes[0][0]) sns.boxplot(data=all_latencies, ax=axes[0][1]) sns.violinplot(data=all_latencies, ax=axes[1][0]) sns.stripplot(data=all_latencies, ax=axes[1][1]) utils.save( "~/duguidlab/visuomotor_control/figures/srf_grant/reach_behaviour_grasp_latency.pdf" )
#%% import seaborn as sns sns.set_theme() sns.set(rc={'figure.figsize': (11.7, 8.27)}) import pandas as pd #%% df = pd.read_csv('gamespot_reviews.csv') df.info() #%% sns.histplot(df, x='score', bins=20) import numpy as np score_mean = df['score'].mean() score_median = df['score'].median() print(score_mean) print(score_median) #%% # pd_df = df.sort_values(['score']).reset_index(drop=True) # print (pd_df) #%% sns.countplot(y='genre', data=df, order=df['genre'].value_counts().index) #%% sns.boxplot(data=df, x='score', y='genre', order=df['genre'].value_counts().index) #%%
# Train result visualization plt.scatter(y_train, y_train_pred) plt.xlabel('Actual Price') plt.ylabel('Predicted Price') plt.title('Train Result Scatter') plt.grid() plt.show() # Test result visualization plt.scatter(y_test, y_test_pred) plt.xlabel('Actual Price') plt.ylabel('Predicted Price') plt.title('Test Result Scatter') plt.grid() plt.show() # Cheking residuals plt.scatter(y_test, y_test - y_test_pred, color='r') plt.xlabel('Actual Price') plt.ylabel('Residuals') plt.title('Actual Price VS. Residual') plt.grid() plt.show() # Cheking residuals normality residual = sn.histplot(y_test - y_test_pred, kde=True) residual.set_title('Residuals Histogram') residual.set(xlabel='Residuals', ylabel='Frequency') plt.show()
2. Handling missing values age,cabin.embarked """ #Age #number of missing values is 3292 # Histogram to detect any skewed distribution fig, ax = plt.subplots(2, figsize = (7,5)) fig.suptitle("Histogram of Age") ax[0].set_title('Training Data') sns.histplot(df_train['Age'], kde=True,bins=20, ax=ax[0]) ax[1].set_title('Test Data') sns.histplot(df_test['Age'], kde=True,bins=20, ax=ax[1]) fig.tight_layout() plt.show() # Boxplot to detect any outlier fig, ax = plt.subplots(2, figsize = (8,8)) fig.suptitle("Boxplot of Age") ax[0].set_title('Training Data') sns.boxplot(df_train['Age'], ax=ax[0]) ax[1].set_title('Test Data') sns.boxplot(df_test['Age'], ax=ax[1]) fig.tight_layout() plt.show()
def main(): from datetime import datetime laadpaaldata = pd.read_csv("laadpaaldataClean.csv", index_col=0) laadpaaldata['Started'] = pd.to_datetime(laadpaaldata['Started'], format='%Y-%m-%d %H:%M:%S') laadpaaldata['Ended'] = pd.to_datetime(laadpaaldata['Ended'], format='%Y-%m-%d %H:%M:%S') # st.write(laadpaaldata.describe()) # ------------------------------------------------------------------------------------------------------ colomns = [ "TotalEnergy", "ConnectedTime", "ChargeTime", "MaxPower", "OverCharged", "Weekday" ] st.subheader("Wat is de verdeling in vermogens?") st.text("Hieronder kunt you de histogram hieronder aanpassen." "") option2 = st.selectbox('Selecteer uw column voor de histogram?', (colomns)) if option2 == "Weekday": week = [ 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday' ] optionsdict = { 'Monday': 0, 'Tuesday': 1, 'Wednesday': 2, 'Thursday': 3, 'Friday': 4, 'Saturday': 5, 'Sunday': 6 } week1, week2 = st.select_slider( 'Selecteer een speciafieke weekdag (0=Maandag)', options=week, value=('Monday', 'Sunday')) week3 = optionsdict[week1] week4 = optionsdict[week2] # st.write(week3, week4) # st.dataframe(laadpaaldata) df2 = laadpaaldata.loc[((laadpaaldata['Weekday'] >= week3) & (laadpaaldata['Weekday'] <= week4))] # st.dataframe(df2) st.subheader("Histogram", option2) sns.histplot(data=df2, x=option2, bins="auto") st.pyplot() else: df2 = laadpaaldata st.subheader("Histogram") sns.histplot(data=df2, x=option2, bins="auto") st.pyplot() # ------------------------------------------------------------------------------------------------------ st.subheader( "Een histogram van de laadtijd met de bijhorende boxplot.\n" "Dit in combinatie meteen annotatie van het gemiddelde en de median en een benadering van de kansdichtheidsfunctie." "") sns.histplot(data=df2, x="ChargeTime", bins="auto", kde=True) plt.axvline(x=laadpaaldata.ChargeTime.mean(), linewidth=1, color='r', label="mean", alpha=0.5) plt.axvline(x=laadpaaldata.ChargeTime.median(), linewidth=1, color='g', label="median", alpha=0.5) plt.legend(["mean", "median"]) st.pyplot() sns.boxplot(data=df2, x="ChargeTime") st.pyplot() # ------------------------------------------------------------------------------------------------------ sns.histplot(data=laadpaaldata, x="ChargeTime", y="ConnectedTime", bins=40, cbar=True, cbar_kws=dict(shrink=.75)) plt.axvline(x=laadpaaldata.ChargeTime.mean(), linewidth=1, color='g', label="mean", alpha=0.5) plt.axvline(x=laadpaaldata.ChargeTime.median(), linewidth=1, color='y', label="median", alpha=0.5) plt.legend(["mean", "median"]) st.pyplot() # ------------------------------------------------------------------------------------------------------ st.subheader( "Een scatterplot over tijd.\n" "Gebruik de tijdslider en de dropdown menu om een column te selecteren" ) colomns = ["TotalEnergy", "ConnectedTime", "ChargeTime", "MaxPower"] col_one_list = laadpaaldata['Started'].tolist() start_date = laadpaaldata['Started'].iloc[0] end_date = laadpaaldata['Started'].iloc[-1] option3 = st.selectbox('Selecteer uw column voor de y-as', (colomns)) st.write('You selected:', option3) start_slider, end_slider = st.select_slider( 'Select a range for dates created', options=col_one_list, value=(start_date, end_date)) st.write('Your selected time between', start_slider, 'and', end_slider) df = laadpaaldata.loc[((laadpaaldata['Started'] > start_slider) & (laadpaaldata['Ended'] < end_slider))] sns.scatterplot(data=df, x="Started", y=option3) plt.xticks(rotation=45) st.pyplot()
print(df2.tail()) ax = sns.boxplot(x="param", y="percent_change", hue="Site", data=df, palette="Set1", width=0.5) ax.set_xlabel("Parameter") ax.set_ylabel("Sensitivity of Storage Efficiency [$\%$]") plt.savefig("data/paper/sensitivities.jpg", bbox_inches="tight", dpi=300) plt.clf() ax = sns.histplot(df2, x="frozen", hue="Site", palette="Set1", element="step", fill=False) ax.set_ylabel("Discharge duration [ $hours$ ]") ax.set_xlabel("Freezing rate [ $l\\, min^{-1}$ ]") plt.savefig("data/paper/freeze_rate.jpg", bbox_inches="tight", dpi=300) plt.clf() ax = sns.histplot(df3, x="melted", hue="Site", palette="Set1", element="step", fill=False) ax.set_ylabel("Discharge duration [ $hours$ ]") ax.set_xlabel("Melting rate [ $l\\, min^{-1}$ ]")
'MGLU3.SA': 'MGLU', 'TOTS3.SA': 'TOTS', 'BOVA11.SA': 'BOVA' } acoes_df.rename(columns=rename_cols, inplace=True) # Check for null acoes_df.isnull().sum() acoes_df.dropna(inplace=True) # Save to csv acoes_df.to_csv('acoes.csv') # Graficos histograma simples sns.histplot(acoes_df['GOL']) # Gráfico de todas as acoes plt.figure(figsize=(10, 50)) i = 1 for i in np.arange(1, len(acoes_df.columns)): plt.subplot(7, 1, i + 1) sns.histplot(acoes_df[acoes_df.columns[i]], bins=25, kde=True) plt.title(acoes_df.columns[i]) # Gráfico de boxplot sns.boxplot(x=acoes_df['GOL']) plt.figure(figsize=(10, 50)) i = 1 for i in np.arange(1, len(acoes_df.columns)):
# Zmienne kategoryczne: # - zdedydowana większość nieruchomości dotyczy nieruchomości nie będących częścią zamkniętego osiedla # - nieruchmości posiadające balkon stanowią 60 % # - blisko 500 nieruchomości posiada dostęp do ogródka # - żadna z ofert nie zawierała informacji o dostępie do garażu / miejsca postojowego - być może jest to wynikiem błędu w procesie pobierania danych ze strony # - ponad 60 % nieruchomości nie znajduje się w budynku, który jest wyposażony w windę # - występowanie informacji o przynależności piwnicy stanowi 50 % ogłoszeń # - blisko 25 % nieruchomości nie posiada monitoringu czy ochrony for feature in [ "powierzchnia", "cena", "cena_metr", "czas_auto", "czas_zbiorowy", "dystans_auto", "dystans_zbiorowy" ]: fig = plt.figure(figsize=(16, 8)) sns.histplot(data=df_with_localisation_cleaned, x=feature) plt.plot() fig = plt.figure(figsize=(16, 8)) sns.boxplot(data=df_with_localisation_cleaned, y=feature) plt.plot() # Dzięki wykresom boxplot można zauważyć obserwacje odstające w skali całego zbioru. Są to nieruchomości o powierzchni powyżej 120 metrów kwadratowych i cenie za metr 20000. Analizując wykres boxplot oraz histogram dla zmiennej cena, można zauważyć kilka bardzo wysokich ofert - w tym oferta z ceną 16 milionów złotych. for category in [ "rynek", "ogrzewanie", "winda", "balkon", "ogrodek", "piwnica", "monitoring_ochrona", "stan_wykonczenia", "teren_zamkniety" ]: fig = plt.figure(figsize=(16, 8)) sns.boxplot(y=df_with_localisation_cleaned["cena_metr"], x=df_with_localisation_cleaned[category]) plt.plot()
"count": vencedor.values }) sns.barplot(x="winner", y="count", data=df_vencedor).set_title("Distribuição dos ganhadores por lado") categoria = df.groupby('weight_class')['weight_class'].count().sort_values( ascending=False)[0:5] df_categoria = pd.DataFrame({ 'weight_class': categoria.index, "count": categoria.values }) sns.barplot( x='weight_class', y="count", data=df_categoria).set_title("Distribuição dos lutadores por categoria") sns.histplot(df["no_of_rounds"]).set_title('Distribuição das lutas por rounds') vermelhos = df[df["Winner"] == "Red"]["R_fighter"] azuis = df[df["Winner"] == "Blue"]["B_fighter"] df_vermelho = pd.DataFrame({ "count": vermelhos.index, "winner": vermelhos.values }) df_azul = pd.DataFrame({"count": azuis.index, "winner": azuis.values}) winners = pd.concat([df_azul, df_vermelho]) sns.barplot( x="winner", y="count", data=winners.head(5)).set_title("Cinco jogadores com menos vitórias") df_vermelhos = pd.DataFrame({ "count": vermelhos.index,
""" n = len(arr) # sample sizes s2 = np.var(arr, ddof=1) # sample variance df = n - 1 # degrees of freedom upper = (n - 1) * s2 / stats.chi2.ppf((1 - gamma) / 2, df) lower = (n - 1) * s2 / stats.chi2.ppf(1 - (1 - gamma) / 2, df) return lower, upper if __name__ == '__main__': population = stats.norm.rvs(loc=0.0, scale=1.0, size=1000000) for idx, sample_size in enumerate(N): sample = np.random.choice(a=population, size=sample_size) sns.histplot(x=sample, kde=True, color='orange') plt.savefig(f'../lab1/output/images/output_task1_{idx}.png', bbox_inches='tight') plt.close() with open('output/output_task1.txt', 'a+') as txt: txt.write( f"Sample_size = {sample_size}: " f"Mean = {np.mean(sample)}, Variance = {np.std(sample, ddof=1)}\n" f"A. {task_a(sample)}\n" f"B. {task_b(sample)}\n" f"C. {task_c(sample)}\n\n")
# determine feature importances clf = sklearn.ensemble.RandomForestClassifier(n_estimators=100, n_jobs=args.n_jobs) clf.fit(X, y) scores = clf.feature_importances_ # use a percentile threshold if specified if args.threshold != -1: threshold = np.percentile(scores, args.threshold) # otherwise compute threshold automatically else: threshold = compute_threshold(genes, scores) # select candidate genes candidate_genes = [gene for i, gene in enumerate(genes) if scores[i] > threshold] # plot distribution of gene scores if args.visualize: sns.histplot(scores, kde=True) ymin, ymax = plt.gca().get_ylim() y = [ymin, ymax / 2] plt.plot([threshold, threshold], y, 'r') plt.title(name) plt.tight_layout() plt.savefig('%s/%s-rf-candidate-threshold.png' % (args.output_dir, name)) plt.close() # save results to output file outfile.write('\t'.join([name] + candidate_genes) + '\n')
def _plot_prior_posterior(self, prior_sample, posterior_sample, label): plot_df = pd.concat([pd.DataFrame({'value': prior_sample, 'type': 'prior'}), pd.DataFrame({'value': posterior_sample, 'type': 'posterior'})]) ax = sns.histplot(data=plot_df, x='value', hue='type', kde=True) ax.set(xlabel = '', ylabel=label)
dict[14], df[14] """<a name='a'></a> # 3. Fixed Entry Feature Investigation Picking columns with multiple choice / yes-no answers to compare with memory results ENTER QUESTION NUMBER HERE """ k = 17 #------------- dict[k] sns.histplot(df[k]) """ENTER CUTOFF HERE""" cutoff1 = 1 """-------------------------------------""" #df[[k, 30, 31, 32, 33]] df[[k, 30, 31, 32, 33]].groupby([k]).mean() df[[k, 30]].groupby([k]).count()
# open('%s_N%i.pickle' % (dataset, N), 'wb')) # Then reload like this # dp = pickle.load(file_name) # locals().update(dp) # plots ####### savefigs = True # do you want to save figures as pdfs plt.style.use('ggplot') pal = sb.dark_palette('white', n_colors=2) # Compare standard and path sampling estimates of the log-normalising cst plt.figure() diff_est = [(r['out'].logLts[-1] - r['path_sampling']) for r in results if r['type'] == 'tempering'] sb.histplot(diff_est) # Figure 17.1: typical behaviour of IBIS typ_ibis = [r for r in results if r['type'] == 'ibis' and r['K'] == typK][0] typ_ess = typ_ibis['out'].ESSs typ_rs_times = np.nonzero(typ_ibis['out'].rs_flags)[0] # Left panel: evolution of ESS fig, ax = plt.subplots() ax.plot(typ_ess, 'k') ax.set(xlabel=r'$t$', ylabel='ESS') if savefigs: plt.savefig(dataset_name + '_typical_ibis_ess.pdf') # Right panel: evolution of resampling times fig, ax = plt.subplots()
import pandas as pd import matplotlib.pyplot as plt import seaborn as sb data = pd.read_csv('input_data.csv') td = data[' Total Discharges '] #1-(a) sb.distplot(td) plt.show() sb.histplot(td) plt.show() data[' Average Covered Charges '] = data[' Average Covered Charges '].apply(lambda x: x[1:]) data[' Average Total Payments '] = data[' Average Total Payments '].apply(lambda x: x[1:]) data['Average Medicare Payments'] = data['Average Medicare Payments'].apply(lambda x: x[1:]) data = data.astype({' Average Covered Charges ': 'float'}) data = data.astype({' Average Total Payments ': 'float'}) data = data.astype({'Average Medicare Payments': 'float'}) #1-(b) sb.distplot(data[' Average Covered Charges ']) plt.show() sb.histplot(data[' Average Covered Charges ']) plt.show() #1-(c) plt.scatter(data[' Average Total Payments '], data['Average Medicare Payments']) plt.xlabel('Average Total Payments') plt.ylabel('Average Medicare Payments') plt.show() #1-(d)
Sofa_score # In[ ]: sofa_score['SOFA'] = sofa_score.sofa sofa_score.drop('sofa',axis =1, inplace = True) sofa_score.head() # In[ ]: plt.figure(figsize=(10,10)) ax = sns.histplot(x= 'SOFA' , data=sofa_score) ax.set_title('Histogram Plot For Sofa Score') # In[ ]: df_expls = pd.read_sql(query_schema+'select * from explicit_sepsis', con) df_expls = df_expls.groupby('subject_id')[['severe_sepsis', 'septic_shock', 'sepsis']].max() df_expls.sum() # In[ ]:
def histogram_unweighted_team_compositions(team_sizes): sns.histplot(data=team_sizes['twers'].apply(int)).set(xlabel='number of TW coders on the team',ylabel='number of teams') plt.show() ratio = team_sizes['nontwers'].apply(int)/team_sizes['twers'].apply(int) sns.histplot(data=ratio).set(xlabel='ratio of nonthoughtworks coders to TW coders (nonTWers/TWers)', ylabel='number of teams') plt.show()
sns.boxplot(x=data['target'],y=data[quantitative['cont'][0]],hue=data[qualitative['nominal'][2]]) sns.boxplot(x=data['target'],y=data[quantitative['cont'][0]],hue=data[qualitative['nominal'][1]]) sns.boxplot(x=data['target'],y=data[quantitative['cont'][0]],hue=data[qualitative['nominal'][3]]) sns.boxplot(x=data['target'],y=data[quantitative['cont'][0]],hue=data[qualitative['nominal'][4]]) sns.boxplot(x=data['target'],y=data[quantitative['cont'][0]],hue=data[qualitative['nominal'][5]]) sns.boxplot(x=data['target'],y=data[quantitative['cont'][0]],hue=data[qualitative['nominal'][6]]) sns.boxplot(x=data['target'],y=data[quantitative['discrete'][0]],hue=data[qualitative['nominal'][2]]) sns.histplot(data[quantitative['discrete'][0]],binwidth=(10),cumulative=True,element='poly',alpha=0.3,stat='probability') sns.histplot(data[quantitative['discrete'][0]],binwidth=(10),cumulative=False,stat='count') #Mine realtions b/w various quantitative features visually def get_index(feature): return feature.value_counts().index sns.countplot(data[qualitative['nominal'][1]]) sns.countplot(data[qualitative['nominal'][2]]) sns.countplot(data[qualitative['nominal'][3]]) sns.countplot(data[qualitative['nominal'][4]])