class Data: def __init__(self, *args, **kwargs): self.power = "" # get data and merge on 'tds' if "Electricity" in args: self.power = "Electricity" self.data = Electricity().get_data(**kwargs) else: self.power = "Water" self.data = Water().get_data(**kwargs) self.coords = Coords().get_all_data() # cleanup the data self.merge_coords() self.add_year_column() self.set_dtypes() # compute df of mean consumption by (tds x year) self.tds_by_year = None self.get_by_year_data() # add other pre-computed dataframes here # ... self.tds_sum_year = None self.get_aggregate_data() # Merging coordinates with other datasets on TDS value def merge_coords(self): """ Merge the coords into (lat,long) into the data """ self.data = self.data.merge(self.coords, on="tds") # Setting the datatype of the merged dataset def set_dtypes(self): """ Set the datatypes to intended types """ self.data.latitude = self.data.latitude.astype(float) self.data.longitude = self.data.longitude.astype(float) self.data.year = self.data.year.astype(int) if self.power == "Electricity": self.data.consumption_kwh = self.data.consumption_kwh.astype(float) else: self.data.consumption_hcf = self.data.consumption_hcf.astype(float) # Adding a year column from the revenue month column def add_year_column(self): """ Create a new column with the year from revenue_month """ years = self.data["revenue_month"].apply(lambda x: int(x.split("-")[0])) self.data["year"] = years # Getting the mean consumption for each year def get_by_year_data(self): """ Get difference in the mean consumption between years. """ if self.power == "Electricity": self.tds_by_year = self.data.groupby(["tds", "year"]).consumption_kwh.mean() else: self.tds_by_year = self.data.groupby(["tds", "year"]).consumption_hcf.mean() # Getting the total consumption for each year def get_aggregate_data(self): """ Get the total consumption for each year """ if self.power == "Electricity": self.tds_sum_year = self.data.groupby(["tds", "year"]).consumption_kwh.sum() else: self.tds_sum_year = self.data.groupby(["tds", "year"]).consumption_hcf.sum()