""" Advanced Indexing and Slicing using range() """
import config_file  # noqa: F401
from utilities.dataframe_utilities import DataframeUtilities as dfu

summer_df = dfu.get_dataframe("summer.csv")
# print(summer_df)
summer = dfu.get_indexed_dataframe("summer.csv", "Athlete")
# print(summer)

# Case1: Getting the first 5 rows and rows 354 and 765
rows = list(range(5)) + [354, 765]
print(rows)
print(summer_df.iloc[rows])

# Case 2: Getting the first three columns and the columns "Gender" and "Event"
col = summer_df.columns[:3].to_list() + ["Gender", "Event"]
print(col)
print(summer_df.loc[:, col])

# Case 3: Combining Position- and label-based Indexing: Rows at Positions 200
# and 300 and columns "Athlete" and "Medal"
print(summer_df.loc[[200, 300], ["Athlete", "Medal"]])

# Case 4: Combining Position- and label-based Indexing: Rows "PHELPS Michael"
# and positional columns 4 and 6
col = summer.columns[[4, 6]]
print(col)
print(summer.loc["PHELPS, Michael", col])
# print(summer.ix["PHELPS, Michael", [4, 6]])
''' Data Visualization with Matplotlib
The plot() method '''
import config_file  # noqa: F401
from utilities.dataframe_utilities import DataframeUtilities as dfu
import matplotlib.pyplot as plt

titanic = dfu.get_dataframe("titanic.csv")
print(titanic.head())
titanic.info()
# to plot all numerical columns of titanic dataframe
titanic.plot()  # by default line plots
titanic.plot(subplots=True, figsize=(15, 12))  # larger figure size
# separate x- axis scale, seems more meaningful here
titanic.plot(subplots=True, figsize=(15, 12), sharex=False)
# separate x & y axis scale, seems less readable in this case
titanic.plot(subplots=True, figsize=(12, 8), sharex=False, sharey=True)
plt.show()

titanic.age.plot(figsize=(12, 8))
plt.show()

#  Customization of plots
titanic.age.plot(figsize=(12, 8), fontsize=13, c="r", linestyle="-")
# c for color, "r or red" for red color, linestyle can be --, : , -
plt.show()

print(plt.style.available)  # check the available styles
plt.style.use("classic")  # select classic style

# set the range
xticks = [x for x in range(0, 901, 50)]
Example #3
0
''' Applying User-defined functions to dataframes using
apply(), map() and applymap() '''
import config_file  # noqa: F401
from utilities.dataframe_utilities import DataframeUtilities as dfu


summer = dfu.get_dataframe("summer.csv")
sales = dfu.get_indexed_dataframe("sales.csv", 0)
print(sales)
sales.info()

# MIN ACROSS ROWS
print(sales.min(axis=0))

# MIN ACROSS COLUMNS
print(sales.min(axis=1))

''' if we need to perform a specific operation on a dataframe but we don't have
a predefined function e.g: we want to check range of min and max sales either
on a day or for an individual, we must create a function and use it with
apply() method '''

# to check range for each weekday/ individual


def range(series):
    return series.max() - series.min()


# using apply() method on rows/columns of a dataframe
print("\n To check range on each weekday:")
''' String Operations in Pandas '''
import config_file  # noqa: F401
from utilities.dataframe_utilities import DataframeUtilities as dfu


summer = dfu.get_dataframe("summer.csv")
hello = "Hello World"
print(hello)

print(type("Hello World"))
print(len(hello))
print(hello.lower())
print(hello.upper())
print(hello.title())
print(hello.split(" "))
print(hello.replace("Hello", "Hi"))

print("\n string operations on dataframes:")
names = summer.loc[:9, "Athlete"].copy()  # creating a copy
print(names)
print(names.dtypes)  # object datatype
print(names[0])  # 1st element
print(type(names[0]))  # str type

print("\n vectorized string operations using .str :")
# print(names.lower())  # gives error
# we must use .str to perform any string operations on dataframes/series
print(names.str.lower())
print(names.str.title())

# n parameter for number of splits, by default for all occurrences
''' Barcharts and Piecharts
These plots are specifically used to visualize data that is already aggregated
or for discrete variables. To plot continuous variables histograms are used '''
import config_file  # noqa: F401
from utilities.dataframe_utilities import DataframeUtilities as dfu
import matplotlib.pyplot as plt

summer_2012 = dfu.get_indexed_dataframe("summer_2012.csv", "Country")
print(summer_2012)
plt.style.use("seaborn")

# vertical plot
summer_2012.Medal.plot(kind="bar", figsize=(12, 8), fontsize=12)
plt.show()  # bars get labelled as per the index label

# horizontal plot
summer_2012.Medal.plot(kind="barh", figsize=(12, 8), fontsize=12)
plt.show()  # bars get labelled as per the index label

summer_2012.Medal.plot(kind="pie", figsize=(12, 8), fontsize=12)
plt.show()