Esempio n. 1
0
def chess_analysis():
    # Start time count to gauge process run time
    start = time.time()
    api = KaggleApi()
    api.authenticate()

    # downloading datasets for Chess games
    api.dataset_download_files('arevel/chess-games')

    # Read data in chunks of 100000 rows and concatenate into one dataframe at a time to speed up read time
    zf = zipfile.ZipFile('chess-games.zip')
    csv = pd.read_csv(zf.open('chess_games.csv'), chunksize=100000)
    chess_df = pd.concat(csv)

    # Remove any duplicate user names to limit data to one game per user
    chess_df = chess_df.drop_duplicates(subset=['White', 'Black'])

    # remove any rows with stockfish evaluation as this clogs up the data at a later stage
    chess_df = chess_df.drop(chess_df[chess_df.AN.str.contains(r'[{}]')].index)

    # use iterrows to print out data
    for index, row in chess_df.head(1000).iterrows():
        print(index, row)

    # reset index after dropping duplicate users and removing stockfish evaluations
    chess_df = chess_df.reset_index()

    # Define average elo rank per game
    chess_df['AverageElo'] = (chess_df['WhiteElo'] + chess_df['BlackElo']) / 2

    # create lists of conditions to use for np.se;ect to add new columns to turn numeric values into grouped categories
    white_conditions = [
        (chess_df['WhiteElo'] > 2700),
        (chess_df['WhiteElo'] < 2700) & (chess_df['WhiteElo'] >= 2500),
        (chess_df['WhiteElo'] < 2500) & (chess_df['WhiteElo'] >= 2400),
        (chess_df['WhiteElo'] < 2400) & (chess_df['WhiteElo'] >= 2300),
        (chess_df['WhiteElo'] < 2300) & (chess_df['WhiteElo'] >= 2200),
        (chess_df['WhiteElo'] < 2200) & (chess_df['WhiteElo'] >= 2000),
        (chess_df['WhiteElo'] < 2000) & (chess_df['WhiteElo'] >= 1800),
        (chess_df['WhiteElo'] < 1800) & (chess_df['WhiteElo'] >= 1600),
        (chess_df['WhiteElo'] < 1600) & (chess_df['WhiteElo'] >= 1400),
        (chess_df['WhiteElo'] < 1400) & (chess_df['WhiteElo'] >= 1200),
        (chess_df['WhiteElo'] < 1200) & (chess_df['WhiteElo'] >= 0)
    ]

    black_conditions = [
        (chess_df['BlackElo'] >= 2700),
        (chess_df['BlackElo'] < 2700) & (chess_df['BlackElo'] >= 2500),
        (chess_df['BlackElo'] < 2500) & (chess_df['BlackElo'] >= 2400),
        (chess_df['BlackElo'] < 2400) & (chess_df['BlackElo'] >= 2300),
        (chess_df['BlackElo'] < 2300) & (chess_df['BlackElo'] >= 2200),
        (chess_df['BlackElo'] < 2200) & (chess_df['BlackElo'] >= 2000),
        (chess_df['BlackElo'] < 2000) & (chess_df['BlackElo'] >= 1800),
        (chess_df['BlackElo'] < 1800) & (chess_df['BlackElo'] >= 1600),
        (chess_df['BlackElo'] < 1600) & (chess_df['BlackElo'] >= 1400),
        (chess_df['BlackElo'] < 1400) & (chess_df['BlackElo'] >= 1200),
        (chess_df['BlackElo'] < 1200) & (chess_df['BlackElo'] >= 0)
    ]

    average_conditions = [
        (chess_df['AverageElo'] >= 2700),
        (chess_df['AverageElo'] < 2700) & (chess_df['AverageElo'] >= 2500),
        (chess_df['AverageElo'] < 2500) & (chess_df['AverageElo'] >= 2400),
        (chess_df['AverageElo'] < 2400) & (chess_df['AverageElo'] >= 2300),
        (chess_df['AverageElo'] < 2300) & (chess_df['AverageElo'] >= 2200),
        (chess_df['AverageElo'] < 2200) & (chess_df['AverageElo'] >= 2000),
        (chess_df['AverageElo'] < 2000) & (chess_df['AverageElo'] >= 1800),
        (chess_df['AverageElo'] < 1800) & (chess_df['AverageElo'] >= 1600),
        (chess_df['AverageElo'] < 1600) & (chess_df['AverageElo'] >= 1400),
        (chess_df['AverageElo'] < 1400) & (chess_df['AverageElo'] >= 1200),
        (chess_df['AverageElo'] < 1200) & (chess_df['AverageElo'] >= 0)
    ]

    outcome_conditions = [(chess_df['Result']) == "1-0",
                          (chess_df['Result']) == "0-1",
                          (chess_df['Result']) == "1/2-1/2",
                          (chess_df['Result']) == "*"]

    # create a list of the values to assign for each condition
    elo = [
        'Super GM', 'GM', 'GM/IM', 'FM/IM', 'CM/NM', 'Experts', 'Class A',
        'Class B', 'Class C', 'Class D', 'Novices'
    ]
    outcome = ['White Wins', 'Black Wins', 'Draw', 'No Result']

    # create new columns and use np.select to assign values to it using the lists as arguments
    chess_df['WhiteEloRank'] = np.select(white_conditions, elo)
    chess_df['BlackEloRank'] = np.select(black_conditions, elo)
    chess_df['AverageEloRank'] = np.select(average_conditions, elo)
    chess_df['Outcome'] = np.select(outcome_conditions, outcome)

    # create dataframe for moves
    moves_df = chess_df["AN"].str.split(" ", n=30, expand=True)
    moves_df = moves_df.drop(moves_df.iloc[:, 0:31:3], axis=1)

    # append moves dataframe to chess dataframe
    chess_df = pd.concat([chess_df, moves_df], axis=1)
    chess_df.reset_index(inplace=True)

    # sort data from lowest average elo to highest average elo
    chess_df = chess_df.sort_values(by='AverageElo', ascending=False)

    # change data type from object to numeric values
    chess_df[["WhiteElo", "BlackElo", "AverageElo"]] = chess_df[["WhiteElo", "BlackElo", "AverageElo"]].\
        apply(pd.to_numeric)

    classical_df1 = chess_df[chess_df.Event == ' Classical ']
    classical_df2 = chess_df[chess_df.Event == 'Classical ']
    classical = pd.merge(classical_df1, classical_df2, how='outer')

    classical_tournament_df1 = chess_df[chess_df.Event ==
                                        ' Classical tournament ']
    classical_tournament_df2 = chess_df[chess_df.Event ==
                                        'Classical tournament ']
    classical_tournament = pd.merge(classical_tournament_df1,
                                    classical_tournament_df2,
                                    how='outer')

    blitz_df1 = chess_df[chess_df.Event == ' Blitz ']
    blitz_df2 = chess_df[chess_df.Event == 'Blitz ']
    blitz = pd.merge(blitz_df1, blitz_df2, how='outer')

    blitz_tournament_df1 = chess_df[chess_df.Event == ' Blitz tournament ']
    blitz_tournament_df2 = chess_df[chess_df.Event == 'Blitz tournament ']
    blitz_tournament = pd.merge(blitz_tournament_df1,
                                blitz_tournament_df2,
                                how='outer')

    bullet_df1 = chess_df[chess_df.Event == ' Bullet ']
    bullet_df2 = chess_df[chess_df.Event == 'Bullet ']
    bullet = pd.merge(bullet_df1, bullet_df2, how='outer')

    bullet_tournament_df1 = chess_df[chess_df.Event == ' Bullet tournament ']
    bullet_tournament_df2 = chess_df[chess_df.Event == 'Bullet tournament ']
    bullet_tournament = pd.merge(bullet_tournament_df1,
                                 bullet_tournament_df2,
                                 how='outer')

    correspondence_df1 = chess_df[chess_df.Event == ' Correspondence ']
    correspondence_df2 = chess_df[chess_df.Event == 'Correspondence ']
    correspondence = pd.merge(correspondence_df1,
                              correspondence_df2,
                              how='outer')

    # Plot results
    #  Categorical Data
    plots = ['Termination', 'Outcome', 'AverageEloRank']
    plots_1 = ['AverageElo']
    plots_2 = [1, 2]
    game_types = [
        classical, classical_tournament, blitz, blitz_tournament, bullet,
        bullet_tournament, correspondence
    ]
    game_types_str = [
        'Classical', 'Classical Tournament', 'Blitz', 'Blitz Tournament',
        'Bullet', 'Bullet Tournament', 'Correspondence'
    ]

    z = 0
    y = 0
    w = 0
    for x in game_types:
        a = 1  # number of rows, set to 1 to retrieve individual graph groups based on game type
        b = int(len(plots))  # number of columns
        c = 1  # initialize plot counter
        d = 1  # number of rows, set to 1 to retrieve individual graph groups based on game type
        e = int(len(plots_1))  # number of columns
        f = 1  # initialize plot counter
        g = 1  # number of rows, set to 1 to retrieve individual graph groups based on game type
        h = int(len(plots_2))  # number of columns
        k = 1  # initialize plot counter
        for i in plots:
            plt.subplot(a, b, c)
            plt.title(str(game_types_str[z]))
            plt.xlabel(i)
            plt.subplots_adjust(bottom=0.095, top=0.97, hspace=1, wspace=0.45)
            sns.countplot(x=x[i])
            plt.xticks(rotation=30)
            c = c + 1
        z = z + 1
        plt.show()
        plt.clf()

        for i in plots_1:
            plt.subplot(d, e, f)
            plt.title(str(game_types_str[y]))
            plt.xlabel(i)
            plt.subplots_adjust(bottom=0.095, top=0.97, hspace=1)
            sns.histplot(x=x[i], kde=True, bins=25)
            plt.xticks(rotation=30)
            f = f + 1
        y = y + 1
        plt.show()
        plt.clf()
        for i in plots_2:
            plt.subplot(g, h, k)
            plt.title(str(game_types_str[w]))
            plt.xlabel(i)
            plt.subplots_adjust(bottom=0.095, top=0.97, hspace=1)
            sns.countplot(x=x[i])
            plt.xticks(rotation=30)
            k = k + 1
        w = w + 1
        plt.show()
        plt.clf()

    end = time.time()

    print("Run Time: ", (end - start), 'Seconds')
Esempio n. 2
0
def data_analisys(df):
    f, axes = plt.subplots(1, 2, figsize=(20, 4))
    sn.histplot(data=df['energy_100g'], ax=axes[0])
    sn.boxplot(data=df['energy_100g'], ax=axes[1])
    plt.show()
def histogram_weighted_enablement_series(enablement_series, weights):
    expanded_series =  expand_results_by_weights(enablement_series, weights)
    print(expanded_series)
    sns.histplot(data=list(map(int,expanded_series))).set(xlabel='degree of enablement',ylabel='number of TWers')
    plt.show()
Esempio n. 4
0
    }

    with open(os.path.join(root_mod, 'metrics_ceil_general.json'), 'w') as f:
        json.dump(metrics, f)

    log.info(f'Metrics of the general model (CEIL): {metrics}.')

    # plot the residuals of the general model
    for l in labs:

        fig, ax = plt.subplots(1, 1, figsize=(6, 5))

        sns.histplot(residuals['training'][l],
                     binwidth=1,
                     alpha=0.35,
                     color='tab:blue',
                     label='training',
                     log_scale=(False, True),
                     ax=ax)

        sns.histplot(residuals['validation'][l],
                     binwidth=1,
                     alpha=0.35,
                     color='tab:red',
                     label='validation',
                     log_scale=(False, True),
                     ax=ax)

        sns.histplot(residuals['test'][l],
                     binwidth=1,
                     alpha=0.35,
def plot_single_neuron(data, name: str) -> sns.JointGrid:
    """Generates a distance-collisions jointplot of a single neuron.

    For the given object with the neuron name, the function will generate a
    jointplot with the normalized collision chance in the y axis and the
    topological distance on the x. The center of the jointplot is a hexbin
    plot and the sides are the distributions of the variables.

    Parameters
    ----------
    data : CollisionsDistNaive
    name : str
        The neuron's name

    Returns
    -------
    sns.JointGrid
    """

    single_neuron = pd.concat([data.parsed_dend, data.parsed_axon],
                              ignore_index=True)
    single_neuron = single_neuron.rename(
        {
            "dist": "Length of branch [um]",
            "coll": "Chance for collision",
            "coll_normed": "Normalized chance for collision",
        },
        axis=1,
    )
    g = sns.JointGrid(height=8)
    x_ax = single_neuron.query('neurite == "axon"')["Length of branch [um]"]
    y_ax = single_neuron.query(
        'neurite == "axon"')["Normalized chance for collision"]
    extent_ax = (x_ax.min(), x_ax.max(), 0, y_ax.max())
    x_dend = single_neuron.query(
        'neurite == "dendrite"')["Length of branch [um]"]
    y_dend = single_neuron.query(
        'neurite == "dendrite"')["Normalized chance for collision"]
    g.ax_joint.hexbin(
        x=x_ax,
        y=y_ax,
        gridsize=30,
        alpha=0.7,
        edgecolors=None,
        cmap="Greens",
        mincnt=1,
        extent=extent_ax,
    )
    g.ax_joint.hexbin(
        x=x_dend,
        y=y_dend,
        gridsize=30,
        alpha=0.7,
        edgecolors=None,
        cmap="Oranges",
        mincnt=1,
        extent=extent_ax,
    )
    sns.histplot(x=x_ax, alpha=0.5, ax=g.ax_marg_x, color="C2")
    sns.histplot(x=x_dend, alpha=0.5, ax=g.ax_marg_x, color="C1")
    sns.histplot(y=y_ax, alpha=0.5, ax=g.ax_marg_y, color="C2", bins=20)
    sns.histplot(y=y_dend, alpha=0.5, ax=g.ax_marg_y, color="C1", bins=5)
    g.ax_joint.set_xlabel("Length of branch [um]")
    g.ax_joint.set_ylabel("Normalized chance for collision")
    plt.tight_layout()
    sns.despine(trim=True, ax=g.ax_joint)
    g.ax_joint.figure.savefig(
        "results/for_article/fig_coll_agg/coll_vs_dist_single_neuron.pdf",
        transparent=True,
        dpi=300,
    )
    plt.show(block=False)

    return g
    def solve_model(self):

        """
        Runs the entire model.
        """    
        
        t0 = time.time()    #start the clock
        
        
        
        # a. solve household problem
        print("\nSolving household problem...")
        
        self.pol_sav, self.pol_cons, self.it_hh = solve_hh(self.params_pfi)
        
        if self.it_hh < self.maxit-1:
            print(f"Policy function convergence in {self.it_hh} iterations.")
        else : 
            raise Exception("No policy function convergence.")
        
        t1 = time.time()
        print(f'Household problem time elapsed: {t1-t0:.2f} seconds')
            
        
        
        # b. stationary distribution
        
        # discrete approximation
        if self.distribution_method == 'discrete':
            
            print("\nStationary Distribution Solution Method: Discrete Approximation and Forward Iteration on Density Function")
            print("\nComputing...")
            
            # i. approximate stationary density
            self.stationary_pdf, self.it_pdf = discrete_stationary_density(self.pol_sav, self.params_discrete)
            
            if self.it_pdf < self.maxit-1:
                print(f"Convergence in {self.it_pdf} iterations.")
            else : 
                raise Exception("No density function convergence.")
            
            # ii. steady state assets
            self.a_ss = np.sum(np.dot(self.stationary_pdf, self.grid_a_fine))
            
            # iii. marginal wealth density
            self.stationary_wealth_pdf = np.sum(self.stationary_pdf, axis=0)
            
            t2 = time.time()
            print(f'Density approximation time elapsed: {t2-t1:.2f} seconds')
        
        
        
        # eigenvector
        if self.distribution_method == 'eigenvector':
            
            print("\nStationary Distribution Solution Method: Eigenvector Method for Exact Stationary Density")
            print("\nComputing...")
            
            self.stationary_pdf, self.Q = self.eigen_stationary_density()
            
            # i. aggregate asset holdings
            self.a_ss = np.sum(np.dot(self.stationary_pdf, self.grid_a_fine))
            
            # iii. marginal wealth density
            self.stationary_wealth_pdf = np.sum(self.stationary_pdf, axis=0)
            
            t2 = time.time()
            print(f'Density computation time elapsed: {t2-t1:.2f} seconds')
        
        
        
        # monte carlo simulation
        if self.simulate ==1 or self.distribution_method == 'monte carlo':
            
            if self.distribution_method == 'monte carlo':
                print("\nStationary Distribution Solution Method: Monte Carlo Simulation")
            
            print("\nSimulating...")
            
            # i. simulate markov chain and endog. variables
            self.sim_c, self.sim_sav, self.sim_z, self.sim_m, self.euler_error_sim = simulate_MarkovChain(
                self.pol_cons,
                self.pol_sav,
                self.params_sim
            )
            
            # ii. steady state assets
            if self.distribution_method == 'monte carlo':
                self.a_ss = np.mean(self.sim_sav[self.sim_burnin :])
            
            # iii. max and average euler error error, ignores nan which is when the euler equation does not bind
            self.max_error_sim =  np.nanmax(self.euler_error_sim)
            self.avg_error_sim = np.nanmean(np.nanmean(self.euler_error_sim, axis=1)) 
            
            t2 = time.time()
            print(f'Simulation time elapsed: {t2-t1:.2f} seconds')
        
        else:
            t2 = time.time()

    

        # c. calculate euler equation error across the state space
        
        if self.full_euler_error:
            print("\nCalculating Euler Equation Error...")
            
            self.euler_error, self.max_error, self.avg_error = self.ee_error()
            
            t3 = time.time()
            print(f'Euler Eq. error calculation time elapsed: {t3-t2:.2f} seconds')
            
        else: 
            t3 = time.time()
        
        
        
        # d. plot
        
        if self.plott:
            
            print('\nPlotting...')
        
            ##### Solutions #####
            
            plt.plot(self.grid_a, self.pol_sav.T)
            plt.title("Savings Policy Function")
            plt.plot([self.a_bar,self.a_max], [self.a_bar,self.a_max],linestyle=':')
            plt.legend(['z='+str(self.grid_z[0]),'z='+str(self.grid_z[1]),'45 degree line'])
            plt.xlabel('Assets')
            #plt.savefig('savings_policyfunction_pfi_v1.pdf')
            plt.show()
            
            plt.plot(self.grid_a, self.pol_cons.T)
            plt.title("Consumption Policy Function")
            plt.legend(['z='+str(self.grid_z[0]),'z='+str(self.grid_z[1])])
            plt.xlabel('Assets')
            #plt.savefig('consumption_policyfunction_pfi_v1.pdf')
            plt.show()
            
            if self.full_euler_error:
                plt.plot(self.grid_a_fine, self.euler_error.T)
                plt.title('Log10 Euler Equation Error')
                plt.xlabel('Assets')
                #plt.savefig('log10_euler_error_pfi_v1.pdf')
                plt.show()
                
                
                
            ##### Distributions ####
            if self.distribution_method == 'discrete' or self.distribution_method == 'eigenvector':
                
                # joint stationary density
                plt.plot(self.grid_a_fine, self.stationary_pdf.T)
                plt.title("Joint Stationary Density (Discrete Approx.)") if self.distribution_method == 'discrete' else plt.title("Joint Stationary Density (Eigenvector Method)")
                plt.xlabel('Assets')
                plt.legend(['z='+str(self.grid_z[0]),'z='+str(self.grid_z[1])])
                #plt.savefig('joint_density_pfi_v1_discrete.pdf') if self.distribution_method == 'discrete' else plt.savefig('joint_density_pfi_v1_eigenvector.pdf')
                plt.show()
                
                # marginal wealth density
                plt.plot(self.grid_a_fine, self.stationary_wealth_pdf)
                plt.title("Stationary Wealth Density (Discrete Approx.)") if self.distribution_method == 'discrete' else plt.title("Stationary Wealth Density (Eigenvector Method)")
                plt.xlabel('Assets')
                #plt.savefig('wealth_density_pfi_v1_discrete.pdf') if self.distribution_method == 'discrete' else plt.savefig('wealth_density_pfi_v1_eigenvector.pdf')
                plt.show()
                
            
            
            if self.distribution_method == 'monte carlo':
                sns.histplot(self.sim_sav[-1,:], bins=100, stat='density')
                plt.title("Stationary Wealth Density (Monte Carlo Approx.)")
                plt.xlabel('Assets')
               # plt.savefig('wealth_density_pfi_v1_montecarlo.pdf')
                plt.show()
            
            
            
            ##### Simulation #####
            if self.simulate or self.distribution_method == 'monte carlo':
                fig, (ax1, ax2) = plt.subplots(2,1,figsize=(10,6))
                fig.tight_layout(pad=4)
                
                #first individual over first 100 periods
                ax1.plot(np.arange(0,99,1), self.sim_sav[:99,1], np.arange(0,99,1), self.sim_c[:99,1],
                         np.arange(0,99,1), self.sim_z[:99,1],'--')
                ax1.legend(['Savings', 'Consumption', 'Income'])  
                ax1.set_title('Simulation of First Household During First 100 Periods')
                
                #averages over entire simulation
                ax2.plot(np.arange(0,self.simT,1), np.mean(self.sim_sav, axis=1), 
                         np.arange(0,self.simT,1), np.mean(self.sim_c, axis=1) )
                ax2.legend(['Savings', 'Consumption', 'Income'])
                ax2.set_title('Simulation Average over 50,000 Households')
                #plt.savefig('simulation_pfi_v1.pdf')
                plt.show()
                
                

        t4 = time.time()
        print(f'Plot time elapsed: {t4-t3:.2f} seconds')
            

        
            
            
            
        # e. print solution 
        
        if self.distribution_method != 'none':
            print("\n-----------------------------------------")
            print("Stationary Equilibrium Solution")
            print("-----------------------------------------")
            print(f"Steady State Assets = {self.a_ss:.2f}")
        
        if self.simulate or self.distribution_method == 'monte carlo' or self.full_euler_error:
            print("\n-----------------------------------------")
            print("Log10 Euler Equation Error Evaluation")
            print("-----------------------------------------")
            
            if self.full_euler_error:
                print(f"\nFull Grid Evalulation: Max Error  = {self.max_error:.2f}")
                print(f"Full Grid Evalulation: Average Error = {self.avg_error:.2f}")
        
            if self.simulate or self.distribution_method == 'monte carlo':
                print(f"\nSimulation: Max Error  = {self.max_error_sim:.2f}")
                print(f"Simulation: Average Error = {self.avg_error_sim:.2f}")
        
        
        t5 = time.time()
        print(f'\nTotal Run Time: {t5-t0:.2f} seconds')
Esempio n. 7
0
import Conexao as con
import pandas as pd
import seaborn as sns

conexao = con.Conexao()
df = conexao.getCovidImpactDataFrame()
conexao.fecharConexao()

sns.histplot(
    df['PercentOfBaseline']).set_title('Distribuição de PercentOfBaseline')

sns.scatterplot(x=df["Country"], y=df["PercentOfBaseline"],
                data=df).set_title("Relação entre países e PercentOfBaseline")
sns.set_style("dark")

g = sns.scatterplot(x=df["City"], y=df["PercentOfBaseline"], data=df)
g.set_xticklabels(g.get_xticklabels(), rotation=20)
g.set_title("Relação entre cidades e PercentOfBaseline")

aux_df = pd.DataFrame(df["Date"].value_counts())
aux_df.columns = ["QtdVoos"]
sns.scatterplot(x=aux_df.index, y=aux_df["QtdVoos"], data=aux_df)

sns.scatterplot(x=df["Date"], y=df["PercentOfBaseline"], data=df)
Esempio n. 8
0
def main_fig(network_type_list, space_list, N_list, d_list, seed_list,
             weight_list, dynamics):
    """TODO: Docstring for main_fig.

    :network_type: TODO
    :N: TODO
    :d: TODO
    :seed: TODO
    :dynamics: TODO
    :returns: TODO

    """
    colors = ['Reds', 'Blues', 'Greens']
    letters = list('abcdefghijklmnopqrstuvwxyz')
    fig = plt.figure(figsize=(13, 20))
    gs = mpl.gridspec.GridSpec(
        nrows=8,
        ncols=9,
        height_ratios=[0.9, 1, 1, 1, 1.15, 1, 1, 1],
        width_ratios=[1.05, 0.25, 0.75, 0.10, 0.75, 0.25, 0.75, 0.85, 1])
    #plt.rcParams.update({"text.usetex": True,})
    ax = fig.add_subplot(gs[0, :])
    ax.set_axis_off()
    dxdt = r'$\frac{dx_i}{dt} = F(x_i) + w \sum_{j=1}^{N} A_{ij} G(x_i, x_j)$'
    t = ax.text(0.3,
                0.7,
                dxdt,
                ha="center",
                va="center",
                rotation=0,
                size=15,
                bbox=dict(boxstyle="round,pad=0.3",
                          fc="tab:grey",
                          ec="k",
                          lw=1,
                          alpha=0.5))

    dxdt = r'$\frac{dx}{dt} = F(x) + w \beta G(x, x)$'
    t = ax.text(0.91,
                0.7,
                dxdt,
                ha="center",
                va="center",
                rotation=0,
                size=15,
                bbox=dict(boxstyle="round,pad=0.3",
                          fc="tab:grey",
                          ec="k",
                          lw=1,
                          alpha=0.5))
    ax.annotate('One-dimensional Reduction',
                xy=(1.2, 0.8),
                xytext=(0.55, 0.7),
                xycoords='axes fraction',
                fontsize=14,
                color='tab:grey',
                weight='bold')

    #plt.rcParams.update({"text.usetex": False,})
    for (i,
         network_type), N, d, seed, space in zip(enumerate(network_type_list),
                                                 N_list, d_list, seed_list,
                                                 space_list):
        if network_type == 'ER':
            color_bias = 0
            node_size_bias = 1
        else:
            color_bias = 0.1
            node_size_bias = 5

        A_unit, A_interaction, index_i, index_j, cum_index = network_generate(
            network_type, N, 1, 0, seed, d)
        G = nx.from_numpy_array(A_unit)
        feature = feature_from_network_topology(A_unit,
                                                G,
                                                space,
                                                tradeoff_para=0.5,
                                                method='degree')
        if i == 2:
            xlabel = '$w$'
            xk_label = '$k$'
        else:
            xlabel = ''
            xk_label = ''
        if i == 1:
            ylabel = '$y^{(\\mathrm{gl})}_s$'
            ylabel_xs = '$x_s$'
            yk_label = '$P(k)$'
        else:
            ylabel_xs = ''
            ylabel = ''
            yk_label = ''

        ax = fig.add_subplot(gs[i + 1, 0:2])
        ax.annotate('$A_{ij}$',
                    xy=(-0.7, 0.5),
                    xytext=(0.9, 0.9),
                    xycoords='axes fraction',
                    fontsize=15,
                    color='k',
                    alpha=.8,
                    weight='bold')
        title_letter = f'({letters[i+0]}1)'
        title_letter = f'({letters[0]}{i+1})'
        plot_network_topology(ax, network_type, N, A_unit, colors[i],
                              color_bias, node_size_bias, title_letter)
        ax.annotate(network_type,
                    xy=(-0.7, 0.5),
                    xytext=(-0.85 - 0.08 * len(network_type), 0.45),
                    xycoords='axes fraction',
                    fontsize=15,
                    color=sns.color_palette(colors[i])[-1],
                    alpha=.5,
                    weight='bold')
        if i == 0:
            ax.annotate('Topology',
                        xy=(-0.7, 0.5),
                        xytext=(0.25, 1.4),
                        xycoords='axes fraction',
                        fontsize=15,
                        color='tab:grey',
                        alpha=.5,
                        weight='bold')

        xs_multi = read_xs(network_type, N, space, d, seed, N, weight_list)
        ax = fig.add_subplot(gs[i + 1, 2])
        title_letter = f'({letters[i+0]}2)'
        title_letter = f'({letters[1]}{i+1})'
        ax.annotate(title_letter,
                    xy=(-0.2, 1.03),
                    xycoords="axes fraction",
                    size=labelsize * 0.8)
        simpleaxis(ax)
        k = np.sum(A_unit > 0, 0)
        if network_type == 'SF':
            sns.histplot(k,
                         bins=20,
                         stat='density',
                         ax=ax,
                         color=sns.color_palette(colors[i])[-1],
                         alpha=0.5)
            ax.set_yscale('log')
        else:
            sns.histplot(k,
                         bins=20,
                         stat='density',
                         ax=ax,
                         color=sns.color_palette(colors[i])[-1],
                         alpha=0.5)
        ax.set_xlabel(xk_label, fontsize=labelsize)
        ax.set_ylabel(yk_label, fontsize=labelsize)

        ax = fig.add_subplot(gs[i + 1, 4:6])
        title_letter = f'({letters[i+0]}3)'
        title_letter = f'({letters[2]}{i+1})'
        linewidth = 2
        alpha = 0.8
        plot_xs_weight(ax, xlabel, ylabel_xs, A_unit, len(A_unit), weight_list,
                       xs_multi, colors[i], linewidth, alpha, color_bias,
                       title_letter)
        if i == 0:
            ax.annotate('Dynamics',
                        xy=(-0.7, 0.5),
                        xytext=(0.25, 1.4),
                        xycoords='axes fraction',
                        fontsize=15,
                        color='tab:grey',
                        alpha=.5,
                        weight='bold')

        ax = fig.add_subplot(gs[i + 1, 6])
        ax.set_axis_off()
        ax.annotate(' ' * 10,
                    xy=(0.4, 0.50),
                    xytext=(0.75, 0.5),
                    xycoords='axes fraction',
                    ha='center',
                    va='bottom',
                    bbox=dict(boxstyle='rarrow, pad=0.6',
                              fc=sns.color_palette(colors[i])[-1],
                              ec='k',
                              lw=2,
                              alpha=0.5))

        ax = fig.add_subplot(gs[i + 1, 7])
        title_letter = f'({letters[i+0]}4)'
        title_letter = f'({letters[3]}{i+1})'
        m = 1
        xs_group = read_xs(network_type, N, space, d, seed, m, weight_list)
        group_index = group_index_from_feature_Kmeans(feature, m)
        A_reduced, _, _ = reducednet_effstate(A_unit, xs_multi[0], group_index)
        ax.annotate('$\\beta$',
                    xy=(-0.7, 0.5),
                    xytext=(1, 1),
                    xycoords='axes fraction',
                    fontsize=15,
                    color='k',
                    alpha=.8,
                    weight='bold')
        if i == 0:
            ax.annotate('Topology',
                        xy=(-0.7, 0.5),
                        xytext=(0.25, 1.4),
                        xycoords='axes fraction',
                        fontsize=15,
                        color='tab:grey',
                        alpha=.5,
                        weight='bold')
        plot_reduced_network_topology(ax, network_type, A_reduced, m,
                                      colors[i], color_bias, node_size_bias,
                                      title_letter)
        ax = fig.add_subplot(gs[i + 1, 8])
        title_letter = f'({letters[i+0]}5)'
        title_letter = f'({letters[4]}{i+1})'
        if i == 0:
            ax.annotate('Dynamics',
                        xy=(-0.7, 0.5),
                        xytext=(0.25, 1.4),
                        xycoords='axes fraction',
                        fontsize=15,
                        color='tab:grey',
                        alpha=.5,
                        weight='bold')
        plot_xs_weight(ax, xlabel, ylabel, A_reduced, m, weight_list, xs_group,
                       colors[i], linewidth, alpha, color_bias, title_letter,
                       xs_multi, A_unit, group_index)

        if i == 2:
            groundtruth, = ax.plot([], [],
                                   color='tab:grey',
                                   alpha=0.8,
                                   label='ground truth',
                                   linewidth=2)
            reduced1, = ax.plot([], [],
                                color=sns.color_palette(colors[0])[-1],
                                linewidth=3)
            reduced2, = ax.plot([], [],
                                color=sns.color_palette(colors[1])[-1],
                                linewidth=3)
            reduced3, = ax.plot([], [],
                                color=sns.color_palette(colors[2])[-1],
                                linewidth=3)
            ax.legend([groundtruth, (reduced1, reduced2, reduced3)],
                      ['ground truth', 'reduced'],
                      fontsize=legendsize * 0.7,
                      frameon=False,
                      loc=4,
                      bbox_to_anchor=(1.05, -1.8),
                      handler_map={tuple: HandlerTuple(ndivide=None)})

        for j, m in enumerate([3, 5, 10]):
            if i == 2:
                xlabel = '$w$'
            else:
                xlabel = ''
            if i == 1:
                ylabel = '$y^{(\\mathrm{gl})}_s$'
            else:
                ylabel = ''

            if j == 0:
                ax = fig.add_subplot(gs[4 + i + 1, 0])
            elif j == 1:
                ax = fig.add_subplot(gs[4 + i + 1, 3:5])
            elif j == 2:
                ax = fig.add_subplot(gs[4 + i + 1, 7])
            title_letter = f'({letters[i+0]}{6+j*2})'
            title_letter = f'({letters[5+j*2]}{i+1})'
            if i == 0:
                ax.annotate(f'$m={m}$',
                            xy=(0.7, 0.5),
                            xytext=(0.95, 1.25),
                            xycoords='axes fraction',
                            fontsize=15,
                            color='tab:grey',
                            weight='bold')
                ax.annotate('$\\beta_{ab}$',
                            xy=(-0.7, 0.5),
                            xytext=(0.85, 0.85),
                            xycoords='axes fraction',
                            fontsize=15,
                            color='k',
                            alpha=.8,
                            weight='bold')
            xs_group = read_xs(network_type, N, space, d, seed, m, weight_list)
            group_index = group_index_from_feature_Kmeans(feature, m)
            A_reduced, _, _ = reducednet_effstate(
                A_unit, np.random.random(len(A_unit)), group_index)
            plot_reduced_network_topology(ax, network_type, A_reduced, m,
                                          colors[i], color_bias,
                                          node_size_bias, title_letter)

            if j == 0:
                ax = fig.add_subplot(gs[4 + i + 1, 1:3])
            elif j == 1:
                ax = fig.add_subplot(gs[4 + i + 1, 5:7])
            elif j == 2:
                ax = fig.add_subplot(gs[4 + i + 1, 8])
            title_letter = f'({letters[i+0]}{7+j*2})'
            title_letter = f'({letters[6+j*2]}{i+1})'
            #plot_xs_weight(ax, xlabel, ylabel, A_reduced, m, weight_list, xs_group, colors[i], linewidth, alpha, color_bias, title_letter, xs_multi, A_unit, group_index)
            plot_ygl_weight(ax, xlabel, ylabel, A_reduced, m, weight_list,
                            xs_group, colors[i], linewidth, alpha, color_bias,
                            title_letter, xs_multi, A_unit, group_index)

    ax = fig.add_subplot(gs[4, :])
    ax.set_axis_off()
    ax.annotate('m-dimensional Reduction',
                xy=(0.7, 0.5),
                xytext=(0.22, 0.5),
                xycoords='axes fraction',
                fontsize=14,
                color='tab:grey',
                weight='bold')
    dxdt = r'$\frac{dy^{(a)}}{dt} = F(x_i) + w \sum_{b=1}^{m} \beta_{ab} G(y^{(a)}, y^{(b)})$'
    t = ax.text(0.63,
                0.58,
                dxdt,
                ha="center",
                va="center",
                rotation=0,
                size=15,
                bbox=dict(boxstyle="round,pad=0.3",
                          fc="tab:grey",
                          ec="k",
                          lw=1,
                          alpha=0.5))
    draw_brace(ax, (0.12 * ax.get_xlim()[1], 0.8 * ax.get_xlim()[1]),
               'tab:grey', 3, '')

    plt.subplots_adjust(left=0.05,
                        right=0.95,
                        wspace=0.25,
                        hspace=0.70,
                        bottom=0.05,
                        top=0.95)
fig = plt.figure(figsize=(15, 8))
nx, ny = 2, 3

for i in range(5):
    ax = fig.add_subplot(nx, ny, i + 1)
    ax.set_title(allnames[i])

    bins = 10
    binrange_min = min(alldata_LA[i].min(), alldata_ORI[i].min())
    binrange_max = max(alldata_LA[i].max(), alldata_ORI[i].max())
    sns.histplot(alldata_LA[i],
                 ax=ax,
                 bins=bins,
                 binrange=(binrange_min, binrange_max),
                 common_bins=True,
                 kde=False,
                 label="Look ahead",
                 color="orange",
                 alpha=0.3)
    sns.histplot(alldata_ORI[i],
                 ax=ax,
                 bins=bins,
                 binrange=(binrange_min, binrange_max),
                 common_bins=True,
                 kde=False,
                 label="Original",
                 color="blue",
                 alpha=0.2)
    ax.axvline(x=alldata_LA[i].mean(),
               label="means",
Esempio n. 10
0
sns.barplot(x = df['City'].value_counts().values, y = df['City'].value_counts().index)
plt.title('Population per city')
plt.xlabel('Counts')
plt.ylabel('Cities')

plt.figure(figsize=(10, 5))
sns.countplot(x="Gender", hue="Illness", palette="rocket", data=df)

g = sns.FacetGrid(df, col='Illness', height=5)
g = g.map(sns.histplot, "Age")

plt.figure(figsize=(10, 5))
sns.countplot(x="City", hue="Gender", palette="rocket", data=df)

plt.figure(figsize=(10, 5))
sns.histplot(df["Age"], color='r')
plt.title("Age distribution")

plt.figure(figsize=(10, 5))
sns.distplot(df["Income"], color='g')
plt.title("Income distribution")

fig = plt.figure(figsize=(10, 5))
sns.histplot(df[df["Gender"] == "Male"]["Income"], color='b')
sns.histplot(df[df["Gender"] == "Female"]["Income"], color='r')
fig.legend(labels=['Male', 'Female'])
plt.title("Income distribution - Man and Woman")

cities = ['Dallas', 'New York City', 'Los Angeles', 'Mountain View', 'Boston', 'Washington D.C.', 'Austin', 'San Diego']
colors = ['orange', 'red', 'blue', 'teal', 'brown', 'turquoise', 'olive', 'plum']
fig = plt.figure(figsize=(10, 5))
def plot_distribution(
    dist,
    dist_str: str,
    agg_df: pd.DataFrame,
    circle_group: str,
    area_group: str,
    param: str,
    reference_value_dict: Dict[str, float],
    ax,
    legend: bool,
    area_bin_col: str,
    circle_count_col: str,
):
    """
    Plot beta distribution of circle count and total area grouped for param.
    """
    # Locate values with certain circle count and total area group
    pair_df = agg_df.loc[agg_df[circle_count_col] == circle_group].loc[
        (agg_df[area_bin_col] == area_group)
    ]

    # Get parameter values
    values = pair_df[param].values

    # Value interval
    delta = abs(max(values) - min(values))

    # Fit beta distribution
    a, b, loc, scale = dist.fit(values)
    beta_dist = dist(a, b, loc=loc, scale=scale)

    # Determine x value range
    x = np.linspace(min(values), max(values))

    # Calculate y values for xs
    y = beta_dist.pdf(x)

    # Plot x, y values
    sns.lineplot(
        ax=ax,
        x=x,
        y=y,
        color="black",
        label="Beta Distribution PDF Fit" if legend else None,
        legend=legend,
    )

    # Make a color palette
    colors = sns.color_palette("Reds", n_colors=3)

    # Color generator (infinite)
    color_generator = colorgen(colors)

    # Choose the probability thresholds to plot
    probs = list(reversed(np.arange(0.25, 1.0, step=0.25)))

    # Iterate over probabilities
    for interval, prob in zip([beta_dist.interval(prob) for prob in probs], probs):

        # Plot vertical lines at probabilities at interval edges
        prob_color = next(color_generator)
        prob_text = f"{int(prob * 100)} % of iterations."

        # Iterate over the two interval edges
        for xloc, xoff in zip(interval, (-1, 1)):

            interval_text = f"${round(xloc, 2)}$"
            # Plot vertical line at interval edge
            ax.vlines(
                xloc,
                ymin=0,
                ymax=beta_dist.pdf(xloc),
                color=prob_color,
                label=prob_text if xloc != interval[-1] else None,
            )

            # Plot the interval edge value as text
            ax.text(
                x=xloc + (delta * 0.02 * xoff),
                y=beta_dist.pdf(xloc) / 2.25,
                s=interval_text,
                rotation=90,
                ha="center",
                fontstyle="italic",
                va="center",
                fontsize=8,
            )

        # Test hashing the areas
        # fill_xs = np.linspace(*interval)
        # ax.fill_between(fill_xs, y1=beta_dist.pdf(fill_xs), facecolor=None,
        # edgecolor=None, hatch=next(hatch_generator), alpha=0.01)

    # Plot reference value
    ax.axvline(reference_value_dict[param], linestyle="dashed", color="black")

    # Annotate the reference value
    ax.annotate(
        text="Reference value",
        xy=(reference_value_dict[param], max(y) * 1.02),
        xytext=(reference_value_dict[param] - 0.4 * delta, max(y) * 1.03),
        arrowprops={"arrowstyle": "->"},
    )

    # Remove top and right spines
    sns.despine(top=True, right=True)

    # Set x and y labels
    ax.set_ylabel("Probability Density Function (PDF)")
    ax.set_xlabel(param)

    # Plot the background histplot of true values
    sns.histplot(
        ax=ax,
        x=values,
        stat="density",
        alpha=0.1,
        edgecolor=None,
        color="black",
        label=f"{utils.param_renamer(param)} Histogram",
    )

    # Set legend for plot
    if legend:
        ax.legend(edgecolor="black", loc="upper right")
    else:
        ax.legend().remove()

    def dist_param_str(value: float, name: str):
        """
        Make string repr from param value.
        """
        return f"${name} = {round(value, 3)}$"

    # Kolmigoroff-Smirnov test
    kstest_result = stats.kstest(values, dist_str, args=(a, b, loc, scale))
    statistic = kstest_result[0]
    pvalue = kstest_result[1]

    # Collect some distribution parameters into multi-line-string
    vals = (
        a,
        b,
        beta_dist.median(),
        beta_dist.std(),
        beta_dist.var(),
        statistic,
        pvalue,
    )
    names = (
        r"\alpha",
        r"\beta",
        "median",
        "std",
        "var",
        r"KS\ statistic",
        r"KS\ pvalue",
    )
    assert len(vals) == len(names)
    param_text = "Beta Distribution\n"
    for val, name in zip(vals, names):
        param_text += dist_param_str(val, name)
        param_text += "\n" if name != names[-1] else ""

    # Plot the collected text
    ax.text(
        0.1,
        0.25,
        s=param_text,
        ha="center",
        ma="right",
        fontsize=8,
        transform=ax.transAxes,
    )

    # Figure title
    circle_group_text = circle_group.replace("-", " to ")
    ax.set_title(
        f"Subsampling iterations with circle count from {circle_group_text}"
        " and total area between "
        f"{int(area_group[0])*1000}-{int(area_group[2])*1000} $m^2$."
    )

    # Set x scale
    ax.set_xlim(min(values) - 0.25 * delta, 0.9 * max(values) + 0.5 * delta)

    # Set y scale
    ax.set_ylim(0, max(y) * 1.2)

    # Set param name nicely
    ax.set_xlabel(utils.param_renamer(param))
def plot_distribution(df, col_name, vlabel, name_file, close_file=True):

    s = df[col_name].dropna().sort_index()
    vmin = round_down(s.min(), -1)
    if s.max() < 10:
        vmax = round_up(s.max(), -1)
    else:
        vmax = round_up(s.max() * 1.1, -1)
    vstep = (vmax - vmin) * 10**-1
    fig = plt.figure(constrained_layout=True,
                     figsize=(8, 6),
                     facecolor="lightgray")
    fig.suptitle(col_name.upper(), fontweight='bold')

    gs = GridSpec(2,
                  2,
                  figure=fig,
                  left=0.1,
                  right=0.85,
                  top=0.950,
                  bottom=0.1,
                  hspace=0.0125,
                  height_ratios=[2, 1],
                  wspace=0.005,
                  width_ratios=[1, 2])
    ax_histogram = fig.add_subplot(gs[0, 1])
    ax_boxplot = fig.add_subplot(gs[0, 0])
    ax_timeseries = fig.add_subplot(gs[1, :])

    #boxplot
    sns.boxplot(data=s,
                whis=[0, 100],
                orient="v",
                color='lightblue',
                linewidth=1,
                saturation=1,
                zorder=3,
                ax=ax_boxplot)
    sns.stripplot(data=s,
                  size=2.5,
                  orient="v",
                  color=".3",
                  linewidth=0,
                  ax=ax_boxplot)
    ax_boxplot.set_ylim(vmin - vstep, vmax + vstep)
    ax_boxplot.set_xticklabels("")
    ax_boxplot.set_ylabel(vlabel)  #, fontweight='bold')
    ax_boxplot.grid(axis="y", ls="--", lw=0.75, zorder=2)
    ax_boxplot.set_axisbelow(True)
    ax_boxplot.set_title("Boxplot")  #, fontweight='bold')
    ax_boxplot.xaxis.set_ticks_position('none')
    #histogram
    if s.count() >= 100:
        kbins = np.round(1 + 3.322 * np.log10(s.count())).astype(int)
    else:
        kbins = np.round(np.sqrt(s.count())).astype(int)
    sns.histplot(s,
                 stat="probability",
                 color='lightblue',
                 bins=kbins,
                 binrange=(vmin, vmax),
                 zorder=3,
                 ax=ax_histogram)
    ax_histogram.set_xlim(vmin - vstep, vmax + vstep)
    ax_histogram.set_ylim(0, 1)
    ax_histogram.grid(ls="--", lw=0.75, zorder=1)
    ax_histogram.set_ylabel("Probabilidad (%)")  #, fontweight='bold')
    ax_histogram.set_xlabel(vlabel)  #, fontweight='bold')
    ax_histogram.set_axisbelow(True)
    ax_histogram.yaxis.set_major_locator(
        mticker.FixedLocator(ax_histogram.get_yticks()))
    ax_histogram.set_yticklabels(
        mticker.FormatStrFormatter('%.0f').format_ticks(
            ax_histogram.get_yticks() * 100))
    ax_histogram.set_title("Histograma")  #, fontweight='bold')
    # #time series
    ax_timeseries.plot(s, label="Datos", c='k', lw=1)
    ax_timeseries.set_xlim(s.index.min(), s.index.max())
    ax_timeseries.set_ylim(vmin, vmax)
    ax_timeseries.grid(axis="both", ls="--", lw=0.75, zorder=2)
    ax_timeseries.set_ylabel(vlabel)  #, fontweight='bold')
    ax_timeseries.set_xlabel("Tiempo [años]")  #, fontweight='bold')
    ax_timeseries.set_title("Serie de Tiempo")  #, fontweight='bold')
    ax_timeseries.legend(loc=0, ncol=2)
    fig.savefig("Output/Plot/" + name_file)
    if close_file == True:
        plt.close(fig)
    return
Esempio n. 13
0
for mouse in mouse_ids:
    mouse_results = results[results['mouse_id'] == mouse]
    max_reward = mouse_results['correct'].max()
    best_day = mouse_results[mouse_results['correct'] == max_reward]
    incorrects.append(best_day['incorrect'].values[0])
    max_rewards.append(max_reward)
    best_days.append(best_day['day'].values[0])

# Get trial data from best days
trials = pd.DataFrame(cohort.get_trials())
trials["grasp_latency"] = trials.end - trials.start
latencies = []

for i, mouse in enumerate(mouse_ids):
    mouse_trials = trials[trials['mouse_id'] == mouse]
    day_trials = mouse_trials[mouse_trials['day'] == best_days[i]]
    corrects = day_trials[day_trials['outcome'] == Outcomes.CORRECT]
    latencies.append(corrects["grasp_latency"])

all_latencies = pd.concat(latencies, axis=1, keys=mouse_ids)

_, axes = plt.subplots(2, 2)
sns.histplot(data=all_latencies, ax=axes[0][0])
sns.boxplot(data=all_latencies, ax=axes[0][1])
sns.violinplot(data=all_latencies, ax=axes[1][0])
sns.stripplot(data=all_latencies, ax=axes[1][1])
utils.save(
    "~/duguidlab/visuomotor_control/figures/srf_grant/reach_behaviour_grasp_latency.pdf"
)
#%%
import seaborn as sns
sns.set_theme()
sns.set(rc={'figure.figsize': (11.7, 8.27)})
import pandas as pd

#%%
df = pd.read_csv('gamespot_reviews.csv')
df.info()

#%%
sns.histplot(df, x='score', bins=20)
import numpy as np
score_mean = df['score'].mean()
score_median = df['score'].median()
print(score_mean)
print(score_median)

#%%
# pd_df = df.sort_values(['score']).reset_index(drop=True)
# print (pd_df)
#%%
sns.countplot(y='genre', data=df, order=df['genre'].value_counts().index)

#%%
sns.boxplot(data=df,
            x='score',
            y='genre',
            order=df['genre'].value_counts().index)

#%%
# Train result visualization
plt.scatter(y_train, y_train_pred)
plt.xlabel('Actual Price')
plt.ylabel('Predicted Price')
plt.title('Train Result Scatter')
plt.grid()
plt.show()

# Test result visualization
plt.scatter(y_test, y_test_pred)
plt.xlabel('Actual Price')
plt.ylabel('Predicted Price')
plt.title('Test Result Scatter')
plt.grid()
plt.show()

# Cheking residuals
plt.scatter(y_test, y_test - y_test_pred, color='r')
plt.xlabel('Actual Price')
plt.ylabel('Residuals')
plt.title('Actual Price VS. Residual')
plt.grid()
plt.show()

# Cheking residuals normality
residual = sn.histplot(y_test - y_test_pred, kde=True)
residual.set_title('Residuals Histogram')
residual.set(xlabel='Residuals', ylabel='Frequency')
plt.show()
Esempio n. 16
0
2. Handling missing values

age,cabin.embarked

"""

        #Age

#number of missing values is 3292

# Histogram to detect any skewed distribution

fig, ax = plt.subplots(2, figsize = (7,5))
fig.suptitle("Histogram of Age")
ax[0].set_title('Training Data')
sns.histplot(df_train['Age'], kde=True,bins=20, ax=ax[0])
ax[1].set_title('Test Data')
sns.histplot(df_test['Age'], kde=True,bins=20, ax=ax[1])
fig.tight_layout()
plt.show()

# Boxplot to detect any outlier

fig, ax = plt.subplots(2, figsize = (8,8))
fig.suptitle("Boxplot of Age")
ax[0].set_title('Training Data')
sns.boxplot(df_train['Age'], ax=ax[0])
ax[1].set_title('Test Data')
sns.boxplot(df_test['Age'], ax=ax[1])
fig.tight_layout()
plt.show()
Esempio n. 17
0
def main():
    from datetime import datetime
    laadpaaldata = pd.read_csv("laadpaaldataClean.csv", index_col=0)

    laadpaaldata['Started'] = pd.to_datetime(laadpaaldata['Started'],
                                             format='%Y-%m-%d %H:%M:%S')
    laadpaaldata['Ended'] = pd.to_datetime(laadpaaldata['Ended'],
                                           format='%Y-%m-%d %H:%M:%S')

    # st.write(laadpaaldata.describe())
    # ------------------------------------------------------------------------------------------------------

    colomns = [
        "TotalEnergy", "ConnectedTime", "ChargeTime", "MaxPower",
        "OverCharged", "Weekday"
    ]

    st.subheader("Wat is de verdeling in vermogens?")

    st.text("Hieronder kunt you de histogram hieronder aanpassen." "")
    option2 = st.selectbox('Selecteer uw column voor de histogram?', (colomns))

    if option2 == "Weekday":

        week = [
            'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday',
            'Sunday'
        ]
        optionsdict = {
            'Monday': 0,
            'Tuesday': 1,
            'Wednesday': 2,
            'Thursday': 3,
            'Friday': 4,
            'Saturday': 5,
            'Sunday': 6
        }

        week1, week2 = st.select_slider(
            'Selecteer een speciafieke weekdag (0=Maandag)',
            options=week,
            value=('Monday', 'Sunday'))

        week3 = optionsdict[week1]
        week4 = optionsdict[week2]

        # st.write(week3, week4)

        # st.dataframe(laadpaaldata)
        df2 = laadpaaldata.loc[((laadpaaldata['Weekday'] >= week3) &
                                (laadpaaldata['Weekday'] <= week4))]
        # st.dataframe(df2)

        st.subheader("Histogram", option2)
        sns.histplot(data=df2, x=option2, bins="auto")
        st.pyplot()
    else:
        df2 = laadpaaldata
        st.subheader("Histogram")
        sns.histplot(data=df2, x=option2, bins="auto")
        st.pyplot()

    # ------------------------------------------------------------------------------------------------------

    st.subheader(
        "Een histogram van de laadtijd met de bijhorende boxplot.\n"
        "Dit in combinatie meteen annotatie van het gemiddelde en de median en een benadering van de kansdichtheidsfunctie."
        "")

    sns.histplot(data=df2, x="ChargeTime", bins="auto", kde=True)

    plt.axvline(x=laadpaaldata.ChargeTime.mean(),
                linewidth=1,
                color='r',
                label="mean",
                alpha=0.5)
    plt.axvline(x=laadpaaldata.ChargeTime.median(),
                linewidth=1,
                color='g',
                label="median",
                alpha=0.5)

    plt.legend(["mean", "median"])
    st.pyplot()

    sns.boxplot(data=df2, x="ChargeTime")
    st.pyplot()

    # ------------------------------------------------------------------------------------------------------

    sns.histplot(data=laadpaaldata,
                 x="ChargeTime",
                 y="ConnectedTime",
                 bins=40,
                 cbar=True,
                 cbar_kws=dict(shrink=.75))

    plt.axvline(x=laadpaaldata.ChargeTime.mean(),
                linewidth=1,
                color='g',
                label="mean",
                alpha=0.5)
    plt.axvline(x=laadpaaldata.ChargeTime.median(),
                linewidth=1,
                color='y',
                label="median",
                alpha=0.5)

    plt.legend(["mean", "median"])
    st.pyplot()
    # ------------------------------------------------------------------------------------------------------
    st.subheader(
        "Een scatterplot over tijd.\n"
        "Gebruik de tijdslider en de dropdown menu om een column te selecteren"
    )
    colomns = ["TotalEnergy", "ConnectedTime", "ChargeTime", "MaxPower"]
    col_one_list = laadpaaldata['Started'].tolist()

    start_date = laadpaaldata['Started'].iloc[0]
    end_date = laadpaaldata['Started'].iloc[-1]

    option3 = st.selectbox('Selecteer uw column voor de y-as', (colomns))
    st.write('You selected:', option3)

    start_slider, end_slider = st.select_slider(
        'Select a range for dates created',
        options=col_one_list,
        value=(start_date, end_date))

    st.write('Your selected time between', start_slider, 'and', end_slider)
    df = laadpaaldata.loc[((laadpaaldata['Started'] > start_slider) &
                           (laadpaaldata['Ended'] < end_slider))]

    sns.scatterplot(data=df, x="Started", y=option3)
    plt.xticks(rotation=45)

    st.pyplot()
Esempio n. 18
0
    print(df2.tail())

    ax = sns.boxplot(x="param",
                     y="percent_change",
                     hue="Site",
                     data=df,
                     palette="Set1",
                     width=0.5)
    ax.set_xlabel("Parameter")
    ax.set_ylabel("Sensitivity of Storage Efficiency [$\%$]")
    plt.savefig("data/paper/sensitivities.jpg", bbox_inches="tight", dpi=300)
    plt.clf()

    ax = sns.histplot(df2,
                      x="frozen",
                      hue="Site",
                      palette="Set1",
                      element="step",
                      fill=False)
    ax.set_ylabel("Discharge duration [ $hours$ ]")
    ax.set_xlabel("Freezing rate [ $l\\, min^{-1}$ ]")
    plt.savefig("data/paper/freeze_rate.jpg", bbox_inches="tight", dpi=300)
    plt.clf()

    ax = sns.histplot(df3,
                      x="melted",
                      hue="Site",
                      palette="Set1",
                      element="step",
                      fill=False)
    ax.set_ylabel("Discharge duration [ $hours$ ]")
    ax.set_xlabel("Melting rate [ $l\\, min^{-1}$ ]")
Esempio n. 19
0
    'MGLU3.SA': 'MGLU',
    'TOTS3.SA': 'TOTS',
    'BOVA11.SA': 'BOVA'
}

acoes_df.rename(columns=rename_cols, inplace=True)

# Check for null
acoes_df.isnull().sum()
acoes_df.dropna(inplace=True)

# Save to csv
acoes_df.to_csv('acoes.csv')

# Graficos histograma simples
sns.histplot(acoes_df['GOL'])

# Gráfico de todas as acoes
plt.figure(figsize=(10, 50))
i = 1
for i in np.arange(1, len(acoes_df.columns)):
    plt.subplot(7, 1, i + 1)
    sns.histplot(acoes_df[acoes_df.columns[i]], bins=25, kde=True)
    plt.title(acoes_df.columns[i])

# Gráfico de boxplot
sns.boxplot(x=acoes_df['GOL'])

plt.figure(figsize=(10, 50))
i = 1
for i in np.arange(1, len(acoes_df.columns)):
Esempio n. 20
0
# Zmienne kategoryczne:
# - zdedydowana większość nieruchomości dotyczy nieruchomości nie będących częścią zamkniętego osiedla
# - nieruchmości posiadające balkon stanowią 60 %
# - blisko 500 nieruchomości posiada dostęp do ogródka
# - żadna z ofert nie zawierała informacji o dostępie do garażu / miejsca postojowego - być może jest to wynikiem błędu w procesie pobierania danych ze strony
# - ponad 60 % nieruchomości nie znajduje się w budynku, który jest wyposażony w windę
# - występowanie informacji o przynależności piwnicy stanowi 50 % ogłoszeń
# - blisko 25 % nieruchomości nie posiada monitoringu czy ochrony

for feature in [
        "powierzchnia", "cena", "cena_metr", "czas_auto", "czas_zbiorowy",
        "dystans_auto", "dystans_zbiorowy"
]:
    fig = plt.figure(figsize=(16, 8))
    sns.histplot(data=df_with_localisation_cleaned, x=feature)
    plt.plot()
    fig = plt.figure(figsize=(16, 8))
    sns.boxplot(data=df_with_localisation_cleaned, y=feature)
    plt.plot()

# Dzięki wykresom boxplot można zauważyć obserwacje odstające w skali całego zbioru. Są to nieruchomości o powierzchni powyżej 120 metrów kwadratowych i cenie za metr 20000. Analizując wykres boxplot oraz histogram dla zmiennej cena, można zauważyć kilka bardzo wysokich ofert - w tym oferta z ceną 16 milionów złotych.

for category in [
        "rynek", "ogrzewanie", "winda", "balkon", "ogrodek", "piwnica",
        "monitoring_ochrona", "stan_wykonczenia", "teren_zamkniety"
]:
    fig = plt.figure(figsize=(16, 8))
    sns.boxplot(y=df_with_localisation_cleaned["cena_metr"],
                x=df_with_localisation_cleaned[category])
    plt.plot()
Esempio n. 21
0
    "count": vencedor.values
})
sns.barplot(x="winner", y="count",
            data=df_vencedor).set_title("Distribuição dos ganhadores por lado")

categoria = df.groupby('weight_class')['weight_class'].count().sort_values(
    ascending=False)[0:5]
df_categoria = pd.DataFrame({
    'weight_class': categoria.index,
    "count": categoria.values
})
sns.barplot(
    x='weight_class', y="count",
    data=df_categoria).set_title("Distribuição dos lutadores por categoria")

sns.histplot(df["no_of_rounds"]).set_title('Distribuição das lutas por rounds')

vermelhos = df[df["Winner"] == "Red"]["R_fighter"]
azuis = df[df["Winner"] == "Blue"]["B_fighter"]
df_vermelho = pd.DataFrame({
    "count": vermelhos.index,
    "winner": vermelhos.values
})
df_azul = pd.DataFrame({"count": azuis.index, "winner": azuis.values})
winners = pd.concat([df_azul, df_vermelho])
sns.barplot(
    x="winner", y="count",
    data=winners.head(5)).set_title("Cinco jogadores com menos vitórias")

df_vermelhos = pd.DataFrame({
    "count": vermelhos.index,
Esempio n. 22
0
    """
    n = len(arr)  # sample sizes
    s2 = np.var(arr, ddof=1)  # sample variance
    df = n - 1  # degrees of freedom

    upper = (n - 1) * s2 / stats.chi2.ppf((1 - gamma) / 2, df)
    lower = (n - 1) * s2 / stats.chi2.ppf(1 - (1 - gamma) / 2, df)

    return lower, upper


if __name__ == '__main__':

    population = stats.norm.rvs(loc=0.0, scale=1.0, size=1000000)

    for idx, sample_size in enumerate(N):

        sample = np.random.choice(a=population, size=sample_size)
        sns.histplot(x=sample, kde=True, color='orange')
        plt.savefig(f'../lab1/output/images/output_task1_{idx}.png',
                    bbox_inches='tight')
        plt.close()

        with open('output/output_task1.txt', 'a+') as txt:
            txt.write(
                f"Sample_size = {sample_size}: "
                f"Mean = {np.mean(sample)}, Variance = {np.std(sample, ddof=1)}\n"
                f"A. {task_a(sample)}\n"
                f"B. {task_b(sample)}\n"
                f"C. {task_c(sample)}\n\n")
Esempio n. 23
0
        # determine feature importances
        clf = sklearn.ensemble.RandomForestClassifier(n_estimators=100, n_jobs=args.n_jobs)
        clf.fit(X, y)

        scores = clf.feature_importances_

        # use a percentile threshold if specified
        if args.threshold != -1:
            threshold = np.percentile(scores, args.threshold)

        # otherwise compute threshold automatically
        else:
            threshold = compute_threshold(genes, scores)

        # select candidate genes
        candidate_genes = [gene for i, gene in enumerate(genes) if scores[i] > threshold]

        # plot distribution of gene scores
        if args.visualize:
            sns.histplot(scores, kde=True)
            ymin, ymax = plt.gca().get_ylim()
            y = [ymin, ymax / 2]
            plt.plot([threshold, threshold], y, 'r')
            plt.title(name)
            plt.tight_layout()
            plt.savefig('%s/%s-rf-candidate-threshold.png' % (args.output_dir, name))
            plt.close()

        # save results to output file
        outfile.write('\t'.join([name] + candidate_genes) + '\n')
Esempio n. 24
0
 def _plot_prior_posterior(self, prior_sample, posterior_sample, label):
     plot_df = pd.concat([pd.DataFrame({'value': prior_sample, 'type': 'prior'}),
                          pd.DataFrame({'value': posterior_sample, 'type': 'posterior'})])
     ax = sns.histplot(data=plot_df, x='value', hue='type', kde=True)
     ax.set(xlabel = '', ylabel=label)
Esempio n. 25
0
dict[14], df[14]

"""<a name='a'></a>
# 3. Fixed Entry Feature Investigation

Picking columns with multiple choice / yes-no answers to compare with memory results

ENTER QUESTION NUMBER HERE
"""

k = 17

#-------------
dict[k]

sns.histplot(df[k])

"""ENTER CUTOFF HERE"""

cutoff1 = 1

"""-------------------------------------"""

#df[[k, 30, 31, 32, 33]]

df[[k, 30, 31, 32, 33]].groupby([k]).mean()

df[[k, 30]].groupby([k]).count()


Esempio n. 26
0
#             open('%s_N%i.pickle' % (dataset, N), 'wb'))
# Then reload like this
# dp = pickle.load(file_name)
# locals().update(dp)

# plots
#######
savefigs = True  # do you want to save figures as pdfs
plt.style.use('ggplot')
pal = sb.dark_palette('white', n_colors=2)

# Compare standard and path sampling estimates of the log-normalising cst
plt.figure()
diff_est = [(r['out'].logLts[-1] - r['path_sampling']) for r in results
            if r['type'] == 'tempering']
sb.histplot(diff_est)

# Figure 17.1: typical behaviour of IBIS
typ_ibis = [r for r in results if r['type'] == 'ibis' and r['K'] == typK][0]
typ_ess = typ_ibis['out'].ESSs
typ_rs_times = np.nonzero(typ_ibis['out'].rs_flags)[0]

# Left panel: evolution of ESS
fig, ax = plt.subplots()
ax.plot(typ_ess, 'k')
ax.set(xlabel=r'$t$', ylabel='ESS')
if savefigs:
    plt.savefig(dataset_name + '_typical_ibis_ess.pdf')

# Right panel: evolution of resampling times
fig, ax = plt.subplots()
Esempio n. 27
0
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb

data = pd.read_csv('input_data.csv')
td = data[' Total Discharges ']

#1-(a)
sb.distplot(td)
plt.show()
sb.histplot(td)
plt.show()

data[' Average Covered Charges '] = data[' Average Covered Charges '].apply(lambda x: x[1:])
data[' Average Total Payments '] = data[' Average Total Payments '].apply(lambda x: x[1:])
data['Average Medicare Payments'] = data['Average Medicare Payments'].apply(lambda x: x[1:])
data = data.astype({' Average Covered Charges ': 'float'})
data = data.astype({' Average Total Payments ': 'float'})
data = data.astype({'Average Medicare Payments': 'float'})

#1-(b)
sb.distplot(data[' Average Covered Charges '])
plt.show()
sb.histplot(data[' Average Covered Charges '])
plt.show()
#1-(c)
plt.scatter(data[' Average Total Payments '], data['Average Medicare Payments'])
plt.xlabel('Average Total Payments')
plt.ylabel('Average Medicare Payments')
plt.show()
#1-(d)
Esempio n. 28
0
Sofa_score


# In[ ]:


sofa_score['SOFA'] = sofa_score.sofa
sofa_score.drop('sofa',axis =1, inplace = True)
sofa_score.head()


# In[ ]:


plt.figure(figsize=(10,10))
ax = sns.histplot(x= 'SOFA' , data=sofa_score)
ax.set_title('Histogram Plot For Sofa Score')


# In[ ]:


df_expls = pd.read_sql(query_schema+'select * from explicit_sepsis', con)
df_expls = df_expls.groupby('subject_id')[['severe_sepsis', 'septic_shock', 'sepsis']].max()
df_expls.sum()


# In[ ]:


Esempio n. 29
0
def histogram_unweighted_team_compositions(team_sizes):
    sns.histplot(data=team_sizes['twers'].apply(int)).set(xlabel='number of TW coders on the team',ylabel='number of teams')
    plt.show() 
    ratio = team_sizes['nontwers'].apply(int)/team_sizes['twers'].apply(int)
    sns.histplot(data=ratio).set(xlabel='ratio of nonthoughtworks coders to TW coders (nonTWers/TWers)', ylabel='number of teams')  
    plt.show()
Esempio n. 30
0
sns.boxplot(x=data['target'],y=data[quantitative['cont'][0]],hue=data[qualitative['nominal'][2]])
sns.boxplot(x=data['target'],y=data[quantitative['cont'][0]],hue=data[qualitative['nominal'][1]])
sns.boxplot(x=data['target'],y=data[quantitative['cont'][0]],hue=data[qualitative['nominal'][3]])
sns.boxplot(x=data['target'],y=data[quantitative['cont'][0]],hue=data[qualitative['nominal'][4]])
sns.boxplot(x=data['target'],y=data[quantitative['cont'][0]],hue=data[qualitative['nominal'][5]])
sns.boxplot(x=data['target'],y=data[quantitative['cont'][0]],hue=data[qualitative['nominal'][6]])



sns.boxplot(x=data['target'],y=data[quantitative['discrete'][0]],hue=data[qualitative['nominal'][2]])





sns.histplot(data[quantitative['discrete'][0]],binwidth=(10),cumulative=True,element='poly',alpha=0.3,stat='probability')
sns.histplot(data[quantitative['discrete'][0]],binwidth=(10),cumulative=False,stat='count')

#Mine realtions b/w various quantitative features visually


def get_index(feature):
    return feature.value_counts().index
    
sns.countplot(data[qualitative['nominal'][1]])

sns.countplot(data[qualitative['nominal'][2]])

sns.countplot(data[qualitative['nominal'][3]])

sns.countplot(data[qualitative['nominal'][4]])