data = pd.read_csv("data.csv") data.head() # head method show only first 5 rows col = data.columns print(col) y = data.diagnosis # M or B list = ["Unnamed: 32", "id", "diagnosis"] x = data.drop(list, axis=1) x.head() ax = sns.countplot(y, label="Count") # M = 212, B = 357 x.describe() data_dia = y data = x data_n_2 = (data - data.mean()) / (data.std()) # standardization data = pd.concat([y, data_n_2.iloc[:, 0:10]], axis=1) data = pd.melt(data, id_vars="diagnosis", var_name="features", value_name="value") plt.figure(figsize=(10, 10)) sns.violinplot(x="features", y="value", hue="diagnosis", data=data, split=True, inner="quart") plt.xticks(rotation=90) data = pd.concat([y, data_n_2.iloc[:, 10:20]], axis=1) data = pd.melt(data, id_vars="diagnosis", var_name="features", value_name="value") plt.figure(figsize=(10, 10))
def test_melt(): data = test_data_values[0] with pytest.warns(UserWarning): pd.melt(pd.DataFrame(data))
data.loc[:10, ["speed_level", "Speed"]] # we will learn loc more detailed later data = pd.read_csv("pokemon.csv") data.head() # head shows first 5 rows data.tail() data.columns data.shape data.info() print(data["Type 1"].value_counts( dropna=False)) # if there are nan values that also be counted data.describe() # ignore null entries data.boxplot(column="Attack", by="Legendary") data_new = data.head() # I only take 5 rows into new data data_new melted = pd.melt(frame=data_new, id_vars="Name", value_vars=["Attack", "Defense"]) melted melted.pivot(index="Name", columns="variable", values="value") data1 = data.head() data2 = data.tail() conc_data_row = pd.concat( [data1, data2], axis=0, ignore_index=True) # axis = 0 : adds dataframes in row conc_data_row data1 = data["Attack"].head() data2 = data["Defense"].head() conc_data_col = pd.concat([data1, data2], axis=1) # axis = 0 : adds dataframes in row conc_data_col data.dtypes