Example #1
0
tipos3 = tipos3.to_frame()
tipos3 = tipos3.drop(tipos3[tipos3.iloc[:, 0] != 'category'].index)
tipos3t = np.transpose(tipos3)

df_str = df_v2[list(tipos3t.head(0))]

# In[]

#Normalización de datos para PCA
df_num_norm = StandardScaler().fit_transform(df_num)
df_num_norm = pd.DataFrame(df_num_norm, columns=list(tipos2t.head()))

# In[]

#Matriz de covarianza, correlaciones, gráfica de dependencia líneal y número de condición
cov_df = df_num_norm.cov()
var_global = sum(np.diag(cov_df))
det = np.linalg.det(cov_df)
corr_df = df_num_norm.corr()
sns.heatmap(corr_df, center=0, cmap='Blues_r')
cond_cov = np.linalg.cond(cov_df)

# In[]

#Identificación de outliers y Eliminación del 10%
#a=[]
a_rob = []
media_num_norm = np.array(df_num_norm.mean())
mediana_num_norm = np.array(df_num_norm.median())
inv_cov = np.linalg.inv(np.array(cov_df))
for i in range(len(df_num_norm.index)):
Example #2
0
#     variable = x[vf].as_matrix().reshape(len(x[vf]),1)
#     co_variance = np.cov(variable, y)
#     co_variance_list.append(co_variance[0,1])

# print(co_variance_list)

#for vf in features:
#   variable = x.loc[:,vf]
#  print (variable)

# variable = x.loc[:,'volume'].as_matrix().reshape(len(x['volume']),1)
# y = y.as_matrix().reshape(len(y),1)
# print (type(variable))
# print(type(y))
# print(variable.shape, y.shape)
# co_variance = np.cov(variable, y)
# print (co_variance)

# finalDf = pd.concat([x, y], axis = 1)
cov_value = df.loc[:, x_y].replace(np.nan, 0).values
cov_value = StandardScaler().fit_transform(cov_value)
cov_value = pd.DataFrame(cov_value, columns=x_y)
print(cov_value)
co_variance_matrix = cov_value.cov()

# In[110]:

print(co_variance_matrix)
print(co_variance_matrix.loc[:, ['price']].sort_values('price',
                                                       ascending=False))