# ### Biplot
# 
# A scatterplot projected onto the first two principal components.

# In[10]:

plt.figure()
data_scaled = pd.DataFrame(_scaled, columns=df.columns)
triplot(pca, data_scaled, title='ANES 2012 Biplot', color=data_scaled.PartyID)


# In[11]:

biplot(pca, data_scaled, title='ANES 2012 Biplot', color=data_scaled.PartyID)


# Sure, all of the original axes are negative in the first component. That's okay! To quote Dr. Eric Larson: 
# > Because all the data is somewhat correlated, giving a mostly unidimensional representation. Positive/negative isn't so important because eigenvectors could theoretically start anywhere--but traditionally we use the origin.
# 
# **Update:** The demographic factor of education level has a different sign from the others.

# In[12]:

def fpc_ordered(corr):
    """Reorder correlation matrix based on first principal component (FPC)."""
    ew, ev = np.linalg.eig(corr)
    idx = np.argsort(ew)[::-1]  # Reordering index of eigenvalues
    ew, ev = ew[idx], ev[:, idx]
    e1 = ev[:, 0]
Beispiel #2
0
# ### Biplot
#
# A scatterplot projected onto the first two principal components.

# In[12]:

data_scaled = pd.DataFrame(_scaled, columns=df.columns)
triplot(pca,
        data_scaled,
        title='ANES {} Biplot'.format(YEAR),
        color=data_scaled.PartyID)

# In[13]:

biplot(pca,
       data_scaled,
       title='ANES {} Biplot'.format(YEAR),
       color=data_scaled.PartyID)

# In[14]:

pca.explained_variance_

# ## Dropping na

# In[15]:

df2 = df.dropna()
#imp = Imputer(strategy='mean')
scl = StandardScaler()
pca = PCA()
pipeline = Pipeline([
Beispiel #3
0
    for df in DATA_FRAMES[1:]
]
scaled = [
    scaler_pipeline.transform(df[VARIABLES_CONSISTENT_ACROSS_ALL_YEARS])
    for df in DATA_FRAMES[1:]
]
scaled = [
    pd.DataFrame(arr, columns=VARIABLES_CONSISTENT_ACROSS_ALL_YEARS)
    for arr in scaled
]

# In[9]:

for df, year in zip(scaled, YEARS[1:]):
    biplot(pca,
           df,
           title="{} Survey on {} Axes".format(year, YEARS[1]),
           color=df.PartyID)

# In[10]:

pipeline = Pipeline([
    #        ('imp', imp),
    ('scl', scl),
    ('pca', pca),
])
scaler_pipeline = Pipeline([
    #        ('imp', imp),
    ('scl', scl),
])

evrs = []