# 6. How to get the items of series A not present in series B? # Input import pandas as pd ser1 = pd.Series([1, 2, 3, 4, 5]) ser2 = pd.Series([4, 5, 6, 7, 8]) # Solution import numpy as np ser_diff = pd.Series(np.setdiff1d(ser1, ser2)) # Solution on website offers the following: # ser1[~ser1.isin(ser2)] # the "[]" is syntactic sugar for __get_item__. It can take a boolean array # of the same length to determine which elements should be returned import helper_funcs as hf hf.write_results_str("006", [ser_diff])
# 3. How to convert the index of a series into a column of a dataframe? # Input import numpy as np import pandas as pd mylist = list('abcedfghijklmnopqrstuvwxyz') myarr = np.arange(26) mydict = dict(zip(mylist, myarr)) ser = pd.Series(mydict) # My Solution import helper_funcs as hf df = ser.reset_index() hf.write_results_str("003", [df])
# 2. How to create a series from a list, numpy array and dict? # Input import numpy as np mylist = list('abcedfghijklmnopqrstuvwxyz') myarr = np.arange(26) mydict = dict(zip(mylist, myarr)) # My Solution import pandas as pd series_from_list = pd.Series(data=mylist) series_from_array = pd.Series(data=myarr) series_from_dict = pd.Series(data=mydict) import helper_funcs as hf hf.write_results_str("002", [series_from_list, series_from_array, series_from_dict])
# 8. How to get the minimum, 25th percentile, median, 75th, and max of a numeric series? import numpy as np import pandas as pd import helper_funcs as hf # Input ser = pd.Series(np.random.normal(10, 5, 25)) # Solution summary_series = ser.describe() # The Website suggests # print(np.percentile(ser, q=[0, 25, 50, 75, 100])) # Also possible is # print(ser.quantile([0, 0.25, 0.5, 0.75, 1.0])) print(summary_series) hf.write_results_str("008", [ser, summary_series])
# 9. How to get frequency counts of unique items of a series? import numpy as np import pandas as pd import helper_funcs as hf # Input ser = pd.Series(np.take(list('abcdefgh'), np.random.randint(8, size=30))) # Solution counts = ser.value_counts() hf.write_results_str("009", [ser, counts])
# 5. How to assign name to the series’ index? # Input import pandas as pd ser = pd.Series(list('abcedfghijklmnopqrstuvwxyz')) # Solution ser.name = "alphabets" # following line also works # ser.rename("alphabets", inplace=True) import helper_funcs as hf hf.write_results_str("005", [ser])
# 7. How to get the items not common to both series A and series B? # Input import pandas as pd ser1 = pd.Series([1, 2, 3, 4, 5]) ser2 = pd.Series([4, 5, 6, 7, 8]) # Solution combined_series = pd.concat([ser1[~ser1.isin(ser2)], ser2[~ser2.isin(ser1)]], axis=0) # Website suggests using 1d intersection and 1d difference to form the final series. # The difference between my solution and the website is theirs preserves indices of the union... # while mine preserves the indices of the original list import helper_funcs as hf hf.write_results_str("007", [combined_series])
# 10. How to keep only top 2 most frequent values as it is and replace everything else as ‘Other’? import numpy as np import pandas as pd import helper_funcs as hf # Input np.random.RandomState(100) ser = pd.Series(np.random.randint(1, 5, [12])) # Solution top2 = list(ser.value_counts()[:2].index) mod_ser = ser.where(lambda x: (x == top2[1]) | (x == top2[0]), "Other", axis=0) # Website offers # print("Top 2 Freq:", ser.value_counts()) # ser[~ser.isin(ser.value_counts().index[:2])] = 'Other' hf.write_results_str("010", [ser, top2, mod_ser])