Ejemplos de PCA en Python, ejemplos de prince.PCA en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: ACP.py Proyecto: robertorenesc/promidat_ml

 def __init__(self, datos, n_componentes = 5): 
     self.__datos = datos
     self.__modelo = PCA(n_components = n_componentes).fit(self.__datos)
     self.__correlacion_var = self.__modelo.column_correlations(datos)
     self.__coordenadas_ind = self.__modelo.row_coordinates(datos)
     self.__contribucion_ind = self.__modelo.row_contributions(datos)
     self.__cos2_ind = self.__modelo.row_cosine_similarities(datos)
     self.__var_explicada = [x * 100 for x in self.__modelo.explained_inertia_]

Ejemplo n.º 2

0

Mostrar archivo

Archivo: TickerTransform.py Proyecto: smalik/stock_price_prediction

    def get_pca(self, components: int = 3):

        data = self.df
        results = dict()
        pca = PCA(n_components=components,
                  n_iter=100,
                  rescale_with_mean=True,
                  rescale_with_std=True,
                  copy=True,
                  check_input=True)
        results['fit'] = pca.fit(data)
        results['rotated'] = pca.fit_transform(data)
        results['feature_correlations'] = fit.column_correlations(data)

        return results

Ejemplo n.º 3

0

Mostrar archivo

Archivo: DFPCA.py Proyecto: Hann-THL/DATA_SCIENCE

class DFPCA(BaseEstimator, TransformerMixin):
    # NOTE:
    # - DFPCA(n_components=df.shape[1]) to remain every dimensions
    # - DFPCA(rescale_with_mean=False, rescale_with_std=False) to avoid using built-in StandardScaler()
    def __init__(self, columns=None, prefix='pca_', **kwargs):
        self.columns        = columns
        self.prefix         = prefix
        self.model          = PCA(**kwargs)
        self.transform_cols = None
        self.stat_df        = None
        
    def fit(self, X, y=None):
        self.columns        = X.columns if self.columns is None else self.columns
        self.transform_cols = [x for x in X.columns if x in self.columns]
        self.model.fit(X[self.transform_cols])

        # Reference: Reference: https://www.appliedaicourse.com/lecture/11/applied-machine-learning-online-course/2896/pca-for-dimensionality-reduction-not-visualization/0/free-videos
        self.stat_df = pd.DataFrame({
            'dimension': [x+1 for x in range(len(self.model.eigenvalues_))],
            'eigenvalues': self.model.eigenvalues_,
            'explained_inertia': self.model.explained_inertia_,
            'cumsum_explained_inertia': np.cumsum(self.model.explained_inertia_)
        })

        return self
    
    def transform(self, X):
        if self.transform_cols is None:
            raise NotFittedError(f"This {self.__class__.__name__} instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.")

        new_X = self.model.transform(X[self.transform_cols])
        new_X.rename(columns=dict(zip(new_X.columns, [f'{self.prefix}{x}' for x in new_X.columns])), inplace=True)
        new_X = pd.concat([X.drop(columns=self.transform_cols), new_X], axis=1)

        return new_X
    
    def fit_transform(self, X, y=None):
        return self.fit(X).transform(X)

Ejemplo n.º 4

0

Mostrar archivo

Archivo: test_pca.py Proyecto: yushu-liu/Prince

    def setup_class(cls):
        # Load a dataframe
        dataframe = pd.read_csv('tests/data/decathlon.csv', index_col=0)

        # Determine the categorical columns
        cls.df_categorical = dataframe.select_dtypes(exclude=[np.number])

        # Determine the numerical columns
        cls.df_numeric = dataframe.drop(cls.df_categorical.columns,
                                        axis='columns')

        # Determine the size of the numerical part of the dataframe
        (cls.n, cls.p) = cls.df_numeric.shape

        # Determine the covariance matrix
        X = cls.df_numeric.copy()
        cls.center_reduced = ((X - X.mean()) / X.std()).values
        cls.cov = cls.center_reduced.T @ cls.center_reduced

        # Calculate a full PCA
        cls.n_components = len(cls.df_numeric.columns)
        cls.pca = PCA(dataframe, n_components=cls.n_components, scaled=True)

Ejemplo n.º 5

0

Mostrar archivo

Archivo: covid19_predictor.py Proyecto: HackCovid19brasil/covidhelper

    def model_interpretation(self, patient_id, patient_preprocessed, pred,
                             prob, model):
        '''
        Fazer gráficos avaliativos do modelo.
        Argumentos:
            patient_id = string referente a identificação do paciente
            patient_preprocessed = dicionario contendo dados do exame do paciente
            pred = classe predita pelo modelo
            prob = probabilidade referente a classe predita pelo modelo
            model = objeto do modelo
        '''
        #### Pegar variaveis necessárias para o plot (import csv)

        #### Nome dos plots

        plot_1_name = 'app/ai_models/temp/probacurve-' + str(
            patient_id) + '.png'
        plot_2_name = 'app/ai_models/temp/shap-' + str(patient_id) + '.png'
        plot_3_name = 'app/ai_models/temp/dist-' + str(patient_id) + '.png'
        plot_4_name = 'app/ai_models/temp/mapa-' + str(patient_id) + '.png'

        #URL API PLOTS
        plot_1_api = "http://" + self.IP + ":" + self.API_PORT + "/api/media/probacurve-" + str(
            patient_id) + ".png"
        plot_2_api = "http://" + self.IP + ":" + self.API_PORT + "/api/media/shap-" + str(
            patient_id) + ".png"
        plot_3_api = "http://" + self.IP + ":" + self.API_PORT + "/api/media/dist-" + str(
            patient_id) + ".png"
        plot_4_api = "http://" + self.IP + ":" + self.API_PORT + "/api/media/mapa-" + str(
            patient_id) + ".png"

        #### Configurações gerais do plt
        DPI_IMAGES = 100
        FONT_SIZE = 8
        FONT_NAME = 'sans-serif'
        plt.rc('font', family=FONT_NAME, size=FONT_SIZE)
        plt.rc('axes', titlesize=FONT_SIZE, labelsize=FONT_SIZE)
        plt.rc('xtick', labelsize=FONT_SIZE)
        plt.rc('ytick', labelsize=FONT_SIZE)
        plt.rc('legend', fontsize=FONT_SIZE)

        #### PLOT 1 - Distribuição da probabilidade dada pelo modelo para pacientes positivos
        # Itens Necessário: self.probs_df(csv importado) e pred
        exame_resp = pred
        exame_prob = prob
        # Plot
        fig, axis = plt.subplots(nrows=1, ncols=1, figsize=(5, 5))
        sns.kdeplot(self.probs_df['prob_neg'],
                    shade=True,
                    color='#386796',
                    ax=axis,
                    linestyle="--",
                    label='Casos Negativos')
        sns.kdeplot(self.probs_df['prob_pos'],
                    shade=True,
                    color='#F06C61',
                    ax=axis,
                    label='Casos positivos')
        # Pegar eixo XY do Plt object para fazer a interpolação
        if exame_resp == 0:
            xi = 1 - exame_prob
            data_x, data_y = axis.lines[0].get_data()
        elif exame_resp == 1:
            xi = exame_prob
            data_x, data_y = axis.lines[1].get_data()
        # Fazer a interpolação e plot
        yi = np.interp(xi, data_x, data_y)
        axis.plot([xi], [yi],
                  linestyle='None',
                  marker="*",
                  color='black',
                  markersize=10,
                  label='Paciente')
        # Outras configuracoes do plot
        axis.legend(loc="upper right")
        #axis.set_title('Probabilidade de ser COVID Positivo pelo modelo', fontweight='bold')
        axis.set_xlim([0, 1])
        axis.set_ylim([0, axis.get_ylim()[1]])
        plt.tight_layout()
        # Salvar plot 1
        plt.savefig(plot_1_name,
                    dpi=DPI_IMAGES,
                    bbox_inches='tight',
                    pad_inches=0.1)
        plt.close()

        #### PLOT 2 - SHAP
        # Necessário: patient_preprocessed, pred e model
        features = np.array(list(patient_preprocessed.keys()))
        sample_x = np.array(list(patient_preprocessed.values()))
        # Calcular SHAP Value
        explainer = TreeExplainer(model=model)  # Faz o objeto SHAP
        shap_values_sample = explainer.shap_values(sample_x)  # Calculo do SHAP
        expected_value = explainer.expected_value[
            exame_resp]  # Pega o baseline para a classe predita pelo modelo
        shap_values_sample = explainer.shap_values(
            sample_x)  # Calcular os SHAP values
        # Plot
        #plt.title('Valores SHAP', fontweight='bold')
        waterfall_plot(expected_value,
                       shap_values_sample[exame_resp],
                       sample_x,
                       feature_names=features,
                       max_display=20,
                       show=False)
        # Salvar imagem
        plt.tight_layout()
        plt.savefig(plot_2_name,
                    dpi=DPI_IMAGES,
                    bbox_inches='tight',
                    pad_inches=0)
        plt.close()

        #### PLOT 3 - Distribuição das variáveis mais importantes para o modelo
        # Necessário: self.train_df(csv importado), patient_preprocessed, pred
        important_features = [
            'Leucócitos', 'Plaquetas', 'Hemácias', 'Eosinófilos'
        ]
        target_0 = self.train_df[self.train_df['target'] == 0][[
            'Leucócitos', 'Plaquetas', 'Hemácias', 'Eosinófilos'
        ]]
        target_1 = self.train_df[self.train_df['target'] == 1][[
            'Leucócitos', 'Plaquetas', 'Hemácias', 'Eosinófilos'
        ]]
        # Plot
        fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(10, 5))
        # Plot settings
        #sns.set_color_codes()
        #st = fig.suptitle("Distribuição das variáveis importantes para o modelo", fontweight='bold')
        #st.set_y (1.05)
        # Index col/row
        r = 0
        c = 0
        # Loop to plot
        for feat in important_features:
            # Plot distribuição
            sns.kdeplot(list(target_0[feat]),
                        shade=True,
                        color='#386796',
                        ax=axes[r][c],
                        label='Casos Negativos',
                        linestyle="--")
            sns.kdeplot(list(target_1[feat]),
                        shade=True,
                        color='#F06C61',
                        ax=axes[r][c],
                        label='Casos positivos')
            # Pegar a curva de densidade a partir do resultado do modelo
            if pred == 0:
                data_x, data_y = axes[r][c].lines[0].get_data()
            elif pred == 1:
                data_x, data_y = axes[r][c].lines[1].get_data()
            # Pegar a informação (valor) daquela variável importante
            xi = patient_preprocessed[feat]
            yi = np.interp(xi, data_x, data_y)
            ## Plot ponto na curva
            axes[r][c].plot([xi], [yi],
                            linestyle='None',
                            marker="*",
                            color='black',
                            markersize=10,
                            label='Paciente')
            axes[r][c].set_title(feat)
            axes[r][c].legend(loc="upper right")
            axes[r][c].set_ylim([0, axes[r][c].get_ylim()[1]])
            # Mudar onde sera plotado
            if c == 0:
                c += 1
            else:
                r += 1
                c = 0
        # Ajeitar o plot
        plt.tight_layout()
        # Salvar imagem
        plt.savefig(plot_3_name,
                    dpi=DPI_IMAGES,
                    bbox_inches='tight',
                    pad_inches=0.1)
        plt.close()

        #### PLOT 4 - Mapa com SVD para os pacientes
        # Necessário: train_df(csv importado), patient_preprocessed
        amostra = pd.DataFrame(patient_preprocessed, index=[
            0,
        ]).drop(axis=1, columns=['Outra gripe'])

        # Fazer PCA com SVD via prince package
        y_train = self.train_df['target']  # Salvar coluna target
        dados = self.train_df.drop(
            axis=1, columns=['Outra gripe',
                             'target']).copy()  # Dataset para criar o mapa
        pca_obj = PCA(n_components=2, random_state=42)  # Objeto do PCA
        pca_obj.fit(dados)  # Fit no conjunto de dados
        componentes = pca_obj.transform(
            dados)  # Criar os componentes principais dos dados
        transf = pca_obj.transform(amostra)  # Transformar paciente para PCA
        xi = transf.loc[0, 0]  # Eixo X do paciente para plot
        yi = transf.loc[0, 1]  # Eixo Y do paciente para plot
        comp = pd.DataFrame()  # Dataframe para conter os componentes
        comp['C1'] = componentes[0]  # Componente Principal 1
        comp['C2'] = componentes[1]  # Componente Principal 2
        comp['TG'] = y_train  # Variável target para a mascara
        comp_0 = comp[comp['TG'] == 0][['C1', 'C2'
                                        ]]  # Dataframe de CP para negativos
        comp_1 = comp[comp['TG'] == 1][['C1', 'C2'
                                        ]]  # Dataframe de CP para positivos
        # Plot
        fig, ax = plt.subplots(figsize=(8, 8))
        plt.margins(0, 0)
        sns.scatterplot(ax=ax,
                        data=comp_0,
                        x='C1',
                        y='C2',
                        color='#386796',
                        label='Casos Negativos')
        sns.scatterplot(ax=ax,
                        data=comp_1,
                        x='C1',
                        y='C2',
                        color='#F06C61',
                        label='Casos Positivos')
        x_mean, y_mean, width, height, angle = self.build_ellipse(
            comp_0['C1'], comp_0['C2'])
        ax.add_patch(
            Ellipse((x_mean, y_mean),
                    width,
                    height,
                    angle=angle,
                    linewidth=2,
                    color='#386796',
                    fill=True,
                    alpha=0.2))
        x_mean, y_mean, width, height, angle = self.build_ellipse(
            comp_1['C1'], comp_1['C2'])
        ax.add_patch(
            Ellipse((x_mean, y_mean),
                    width,
                    height,
                    angle=angle,
                    linewidth=2,
                    color='#F06C61',
                    fill=True,
                    alpha=0.2))
        ax.plot([xi], [yi],
                linestyle='None',
                marker="*",
                color='black',
                markersize=10,
                label='Paciente')
        # Configurações do plot
        #ax.set_title('Similaridade entre pacientes',fontweight='bold')
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_ylabel('')
        ax.set_xlabel('')
        handles, labels = ax.get_legend_handles_labels()
        labels, handles = zip(
            *sorted(zip(labels, handles), key=lambda t: t[0]))
        ax.legend(handles, labels, loc="upper right")
        # Salvar imagem
        plt.axis('off')
        plt.savefig(plot_4_name,
                    dpi=DPI_IMAGES,
                    bbox_inches='tight',
                    pad_inches=0)
        plt.close()

        # Retornar
        model_result = {
            'prediction': pred,
            'probability': str(round(prob * 100, 2)),
            'probacurve': plot_1_api,
            'shap_img': plot_2_api,
            'dist_img': plot_3_api,
            'mapa_img': plot_4_api
        }
        return model_result
        """

Ejemplo n.º 6

0

Mostrar archivo

def pca(load_df, k):
    """The executed PCA."""
    return PCA(load_df, n_components=k, scaled=True)

Ejemplo n.º 7

0

Mostrar archivo

Archivo: ACP.py Proyecto: robertorenesc/promidat_ml

class ACP:
    def __init__(self, datos, n_componentes = 5): 
        self.__datos = datos
        self.__modelo = PCA(n_components = n_componentes).fit(self.__datos)
        self.__correlacion_var = self.__modelo.column_correlations(datos)
        self.__coordenadas_ind = self.__modelo.row_coordinates(datos)
        self.__contribucion_ind = self.__modelo.row_contributions(datos)
        self.__cos2_ind = self.__modelo.row_cosine_similarities(datos)
        self.__var_explicada = [x * 100 for x in self.__modelo.explained_inertia_]
    @property
    def datos(self):
        return self.__datos
    @datos.setter
    def datos(self, datos):
        self.__datos = datos
    @property
    def modelo(self):
        return self.__modelo
    @property
    def correlacion_var(self):
        return self.__correlacion_var
    @property
    def coordenadas_ind(self):
        return self.__coordenadas_ind
    @property
    def contribucion_ind(self):
        return self.__contribucion_ind
    @property
    def cos2_ind(self):
        return self.__cos2_ind
    @property
    def var_explicada(self):
        return self.__var_explicada
    @var_explicada.setter
    def var_explicada(self, var_explicada):
        self.__var_explicada = var_explicada
    def plot_plano_principal(self, ejes = [0, 1], ind_labels = True, titulo = 'Plano Principal'):
        x = self.coordenadas_ind[ejes[0]].values
        y = self.coordenadas_ind[ejes[1]].values
        plt.subplots(figsize=(13, 13))
        plt.style.use('seaborn-whitegrid')
        plt.scatter(x, y, color = 'gray')
        plt.title(titulo)
        plt.axhline(y = 0, color = 'dimgrey', linestyle = '--')
        plt.axvline(x = 0, color = 'dimgrey', linestyle = '--')
        inercia_x = round(self.var_explicada[ejes[0]], 2)
        inercia_y = round(self.var_explicada[ejes[1]], 2)
        plt.xlabel('Componente ' + str(ejes[0]) + ' (' + str(inercia_x) + '%)')
        plt.ylabel('Componente ' + str(ejes[1]) + ' (' + str(inercia_y) + '%)')
        if ind_labels:
            for i, txt in enumerate(self.coordenadas_ind.index):
                plt.annotate(txt, (x[i], y[i]))
    def plot_circulo(self, ejes = [0, 1], var_labels = True, titulo = 'Círculo de Correlación'):
        cor = self.correlacion_var.iloc[:, ejes].values
        plt.style.use('seaborn-whitegrid')
        c = plt.Circle((0, 0), radius = 1, color = 'steelblue', fill = False)
        plt.subplots(figsize=(13, 13))
        plt.gca().add_patch(c)
        plt.axis('scaled')
        plt.title(titulo)
        plt.axhline(y = 0, color = 'dimgrey', linestyle = '--')
        plt.axvline(x = 0, color = 'dimgrey', linestyle = '--')
        inercia_x = round(self.var_explicada[ejes[0]], 2)
        inercia_y = round(self.var_explicada[ejes[1]], 2)
        plt.xlabel('Componente ' + str(ejes[0]) + ' (' + str(inercia_x) + '%)')
        plt.ylabel('Componente ' + str(ejes[1]) + ' (' + str(inercia_y) + '%)')
        for i in range(cor.shape[0]):
            plt.arrow(0, 0, cor[i, 0] * 0.95, cor[i, 1] * 0.95, color = 'steelblue', 
                      alpha = 0.5, head_width = 0.05, head_length = 0.05)
            if var_labels:
                plt.text(cor[i, 0] * 1.05, cor[i, 1] * 1.05, self.correlacion_var.index[i], 
                         color = 'steelblue', ha = 'center', va = 'center')
    def plot_sobreposicion(self, ejes = [0, 1], ind_labels = True, 
                      var_labels = True, titulo = 'Sobreposición Plano-Círculo'):
        x = self.coordenadas_ind[ejes[0]].values
        y = self.coordenadas_ind[ejes[1]].values
        cor = self.correlacion_var.iloc[:, ejes]
        scale = min((max(x) - min(x)/(max(cor[ejes[0]]) - min(cor[ejes[0]]))), 
                    (max(y) - min(y)/(max(cor[ejes[1]]) - min(cor[ejes[1]])))) * 0.7
        cor = self.correlacion_var.iloc[:, ejes].values
        plt.subplots(figsize=(13, 13))
        plt.style.use('seaborn-whitegrid')
        plt.axhline(y = 0, color = 'dimgrey', linestyle = '--')
        plt.axvline(x = 0, color = 'dimgrey', linestyle = '--')
        inercia_x = round(self.var_explicada[ejes[0]], 2)
        inercia_y = round(self.var_explicada[ejes[1]], 2)
        plt.xlabel('Componente ' + str(ejes[0]) + ' (' + str(inercia_x) + '%)')
        plt.ylabel('Componente ' + str(ejes[1]) + ' (' + str(inercia_y) + '%)')
        plt.scatter(x, y, color = 'gray')
        if ind_labels:
            for i, txt in enumerate(self.coordenadas_ind.index):
                plt.annotate(txt, (x[i], y[i]))
        for i in range(cor.shape[0]):
            plt.arrow(0, 0, cor[i, 0] * scale, cor[i, 1] * scale, color = 'steelblue', 
                      alpha = 0.5, head_width = 0.05, head_length = 0.05)
            if var_labels:
                plt.text(cor[i, 0] * scale * 1.15, cor[i, 1] * scale * 1.15, 
                         self.correlacion_var.index[i], 
                         color = 'steelblue', ha = 'center', va = 'center')

Ejemplo n.º 8

0

Mostrar archivo

def run():
    # read command line arguments
    arg_parser = get_arg_parser()
    args = arg_parser.parse_args()

    # create output folder if it doesn't exist
    os.makedirs(args.path, exist_ok=True)

    print("Reading data...")

    # read all csv choices files
    dfs = [pd.read_csv(file, header=[0, 1]) for file in args.files]

    # if more than one file was read, concat them
    if len(dfs) > 1:
        # concat their columns
        choices = pd.concat(dfs, axis=1, join='inner')

        # remove duplicate columns
        choices = choices.loc[:, ~choices.columns.duplicated()]
    else:
        choices = dfs[0]

    # remove columns that will not be used
    choices.drop(columns=['drafter', 'entropy'], level=0, inplace=True)

    # get list of drafters from remaining columns
    drafters = list(choices.columns.get_level_values(0).unique())

    print("Processing data...")

    # concat 1st and 2nd players' choices into a new dataframe
    temp = pd.DataFrame(index=range(len(choices.index) * 2), columns=drafters)
    for drafter in drafters:
        drafter_columns = [
            choices[(drafter, '1st')], choices[(drafter, '2nd')]
        ]

        temp[drafter] = pd.concat(drafter_columns, ignore_index=True)

    # discard original dataframe in favor of the new one
    choices = temp

    print("Calculating similarities...")

    # initialize the similarities dataframe
    similarities = pd.DataFrame(index=drafters, columns=drafters)

    # populate the similarities dataframe
    total_rows = len(choices.index)
    for drafter1 in drafters:
        # the similarity of a drafter and itself is of 100%
        similarities[drafter1][drafter1] = 1.0

        for drafter2 in drafters:
            if drafter1 == drafter2:
                continue

            # calculate amount of equal choices
            equal_rows = (choices[drafter1] == choices[drafter2]).sum()

            # calculate similarity
            similarity = equal_rows / total_rows

            # update appropriate cells in the dataframe
            similarities[drafter2][drafter1] = similarity
            similarities[drafter1][drafter2] = similarity

    # save similarities dataframe to files
    similarities.to_pickle(args.path + '/similarities.pkl')
    similarities.to_csv(args.path + '/similarities.csv')

    print("Applying PCA...")

    # create mapping between choices and equidistant points in a circumference
    choices_to_points = {
        0: math.sin(30),
        1: math.sin(120),
        2: math.sin(210),
        3: math.cos(30),
        4: math.cos(120),
        5: math.cos(210)
    }

    # double the amount of rows to store the points' x and y
    choices = pd.concat([choices, choices + 3])

    # map choices to points
    choices = choices.applymap(choices_to_points.__getitem__)

    # apply PCA down to 3 or 3 dimensions
    pca = PCA(n_components=args.dimensions, random_state=824)
    coords = pca.fit_transform(choices.T)
    coords.columns = ['x', 'y', 'z'] if args.dimensions == 3 else ['x', 'y']

    print("Applying k-means...")

    # apply K-Means to original choices data, finding the optimal value of k
    # with the average silhouette method
    silhouettes, clusterings = [], []

    for k in range(2, len(drafters)):
        print(f"Trying k={k}", end="")

        kmeans = KMeans(n_clusters=k, random_state=824).fit(coords)
        silhouette = silhouette_score(coords, kmeans.labels_, random_state=824)

        silhouettes.append(silhouette)
        clusterings.append(kmeans.labels_)
        print(f", silhouette={silhouette}, labels={kmeans.labels_}")

    best_k = np.argmax(silhouettes) + 2

    labels = clusterings[best_k - 2]
    print(f"Best k: {best_k}. Labels: {labels}")

    # color the drafters according to their cluster
    all_colors = [
        'tab:blue', 'tab:orange', 'tab:green', 'tab:red', 'tab:purple',
        'tab:brown', 'tab:pink', 'tab:gray', 'tab:olive', 'tab:cyan'
    ]
    coords['color'] = [all_colors[cluster_id] for cluster_id in labels]

    print("All done.")

    # rename agents
    for i in range(len(drafters)):
        tokens = drafters[i].split('/')

        if len(tokens) > 1:
            battler = {'max-attack': 'MA', 'greedy': 'GR'}[tokens[-3]]
            drafter = tokens[-2]

            drafters[i] = f"{drafter}/{battler}"

    print("columns")
    print(coords.columns)

    # normalize the axes
    for axis in coords.columns[:-1]:
        coords[axis] -= coords[axis].min()
        coords[axis] /= coords[axis].max()

    print(coords)

    if args.dimensions == 3:
        # plot the PCA coordinates in 3D
        fig = plt.figure()
        ax = fig.add_subplot(111, projection='3d')

        objs = []

        for name, x, y, z, color in coords.itertuples():
            objs.append(ax.scatter(x, y, z, marker='o', c=color, label=name))

        plt.legend(objs, drafters, ncol=3, fontsize=8, loc='upper left')

        ax.set_xlabel('X Label')
        ax.set_ylabel('Y Label')
        ax.set_zlabel('Z Label')
    else:
        # plot the PCA coordinates in 2D
        plt.subplots_adjust(bottom=0.1)

        for name, x, y, color in coords.itertuples():
            plt.scatter(x, y, marker='o', label=name, c=color)

        for label, x, y in zip(drafters, coords['x'], coords['y']):
            plt.annotate(label,
                         xy=(x, y),
                         xytext=(-20, 20),
                         textcoords='offset points',
                         ha='right',
                         va='bottom',
                         bbox=dict(boxstyle='round,pad=0.5',
                                   fc='yellow',
                                   alpha=0.5),
                         arrowprops=dict(arrowstyle='->',
                                         connectionstyle='arc3,rad=0'))

    plt.savefig(args.path +
                f'/similarities{"3D" if args.dimensions == 3 else ""}.png')

    plt.show()

    print("✅")

Ejemplo n.º 9

0

Mostrar archivo

Archivo: DFPCA.py Proyecto: Hann-THL/DATA_SCIENCE

 def __init__(self, columns=None, prefix='pca_', **kwargs):
     self.columns        = columns
     self.prefix         = prefix
     self.model          = PCA(**kwargs)
     self.transform_cols = None
     self.stat_df        = None