def select_series(input_df, kunag=500057582, matnr=103029): """ selects a series corresponding to the given kunag and matnr """ output_ts = pd.DataFrame() output_ts = input_df[(input_df["kunag"] == kunag) & (input_df["matnr"] == matnr)] output_ts = remove_negative(output_ts) return output_ts
def frequency(input_df): """ returns number of datapoints in last year :param input_df: :return: """ input_df = transformation.remove_negative(input_df) latest_date = input_df.index[-1] latest_year = latest_date.year - 1 latest_month = latest_date.month latest_day = latest_date.day last_year_date = datetime(latest_year, latest_month, latest_day) output_df = input_df[last_year_date:] freq = output_df.shape[0] return freq
def normalized_frequency(input_df): """ returns number of datapoints in last year with normalization input: a ts with a particular kunag and matnr output: an integer """ input_df = remove_negative(input_df) input_df["parse_date"] = pd.to_datetime(input_df["date"], format="%Y%m%d") input_df = input_df.sort_values("parse_date") if input_df.shape[0] <= 5: return 0 latest_date = input_df["parse_date"].iloc[-1] first_date = input_df["parse_date"].iloc[0] len_in_days = (latest_date - first_date).days if len_in_days <= 365: return -1 if len_in_days <= 547: return -2 latest_year = latest_date.year-1 latest_month = latest_date.month latest_day = latest_date.day last_year_date = datetime(latest_year, latest_month, latest_day) freq = input_df[(input_df["parse_date"] >= last_year_date) & (input_df["parse_date"] <= latest_date)].shape[0] return freq
if __name__ == "__main__": import matplotlib.pyplot as plt import transformation import data_transformation file_address = "/home/aman/Desktop/CSO_drug/data/raw_data_drug_store_sample_30_2018-12-10.tsv" dateparse = lambda dates: pd.datetime.strptime(dates, '%Y%m%d') data = pd.read_csv(file_address, sep="\t", parse_dates=['date'], index_col='date', date_parser=dateparse) data = data.sort_index() data = transformation.remove_negative(data) ts = transformation.select_series(data, kunag=600142082, matnr=145105) ts = ts.reset_index() ts = data_transformation.get_weekly_aggregate(ts) ts.Timestamp = pd.to_datetime(ts.dt_week, format='%Y-%m-%d') ts.index = ts.Timestamp plt.figure(figsize=(12, 8)) plt.plot(ts["quantity"]) plt.show() plt.savefig("abc.png") # print(ts) # print("done") norm_freq_last_year_all_series(data) combination_with_freq = pd.read_csv( "/home/aman/Desktop/CSO_drug/file_generated/freq_last_year_all_series.csv" )