Example #1
0
data = pd.read_csv("data.csv")
data.head()  # head method show only first 5 rows
col = data.columns
print(col)
y = data.diagnosis  # M or B
list = ["Unnamed: 32", "id", "diagnosis"]
x = data.drop(list, axis=1)
x.head()
ax = sns.countplot(y, label="Count")  # M = 212, B = 357
x.describe()
data_dia = y
data = x
data_n_2 = (data - data.mean()) / (data.std())  # standardization
data = pd.concat([y, data_n_2.iloc[:, 0:10]], axis=1)
data = pd.melt(data,
               id_vars="diagnosis",
               var_name="features",
               value_name="value")
plt.figure(figsize=(10, 10))
sns.violinplot(x="features",
               y="value",
               hue="diagnosis",
               data=data,
               split=True,
               inner="quart")
plt.xticks(rotation=90)
data = pd.concat([y, data_n_2.iloc[:, 10:20]], axis=1)
data = pd.melt(data,
               id_vars="diagnosis",
               var_name="features",
               value_name="value")
plt.figure(figsize=(10, 10))
Example #2
0
def test_melt():
    data = test_data_values[0]
    with pytest.warns(UserWarning):
        pd.melt(pd.DataFrame(data))
Example #3
0
data.loc[:10, ["speed_level",
               "Speed"]]  # we will learn loc more detailed later
data = pd.read_csv("pokemon.csv")
data.head()  # head shows first 5 rows
data.tail()
data.columns
data.shape
data.info()
print(data["Type 1"].value_counts(
    dropna=False))  # if there are nan values that also be counted
data.describe()  # ignore null entries
data.boxplot(column="Attack", by="Legendary")
data_new = data.head()  # I only take 5 rows into new data
data_new
melted = pd.melt(frame=data_new,
                 id_vars="Name",
                 value_vars=["Attack", "Defense"])
melted
melted.pivot(index="Name", columns="variable", values="value")
data1 = data.head()
data2 = data.tail()
conc_data_row = pd.concat(
    [data1, data2], axis=0,
    ignore_index=True)  # axis = 0 : adds dataframes in row
conc_data_row
data1 = data["Attack"].head()
data2 = data["Defense"].head()
conc_data_col = pd.concat([data1, data2],
                          axis=1)  # axis = 0 : adds dataframes in row
conc_data_col
data.dtypes