Beispiel #1
0
def select_series(input_df, kunag=500057582, matnr=103029):
    """ selects a series corresponding to the given kunag and matnr """
    output_ts = pd.DataFrame()
    output_ts = input_df[(input_df["kunag"] == kunag)
                         & (input_df["matnr"] == matnr)]
    output_ts = remove_negative(output_ts)
    return output_ts
Beispiel #2
0
def frequency(input_df):
    """
    returns number of datapoints in last year
    :param input_df:
    :return:
    """

    input_df = transformation.remove_negative(input_df)
    latest_date = input_df.index[-1]
    latest_year = latest_date.year - 1
    latest_month = latest_date.month
    latest_day = latest_date.day

    last_year_date = datetime(latest_year, latest_month, latest_day)
    output_df = input_df[last_year_date:]
    freq = output_df.shape[0]
    return freq
Beispiel #3
0
def normalized_frequency(input_df):
    """ returns number of datapoints in last year with normalization
        input: a ts with a particular kunag and matnr
        output: an integer
    """
    input_df = remove_negative(input_df)
    input_df["parse_date"] = pd.to_datetime(input_df["date"], format="%Y%m%d")
    input_df = input_df.sort_values("parse_date")
    if input_df.shape[0] <= 5:
        return 0
    latest_date = input_df["parse_date"].iloc[-1]
    first_date = input_df["parse_date"].iloc[0]
    len_in_days = (latest_date - first_date).days
    if len_in_days <= 365:
        return -1
    if len_in_days <= 547:
        return -2
    latest_year = latest_date.year-1
    latest_month = latest_date.month
    latest_day = latest_date.day
    last_year_date = datetime(latest_year, latest_month, latest_day)
    freq = input_df[(input_df["parse_date"] >= last_year_date) & (input_df["parse_date"] <= latest_date)].shape[0]
    return freq
Beispiel #4
0
if __name__ == "__main__":

    import matplotlib.pyplot as plt
    import transformation
    import data_transformation

    file_address = "/home/aman/Desktop/CSO_drug/data/raw_data_drug_store_sample_30_2018-12-10.tsv"

    dateparse = lambda dates: pd.datetime.strptime(dates, '%Y%m%d')
    data = pd.read_csv(file_address,
                       sep="\t",
                       parse_dates=['date'],
                       index_col='date',
                       date_parser=dateparse)
    data = data.sort_index()
    data = transformation.remove_negative(data)
    ts = transformation.select_series(data, kunag=600142082, matnr=145105)
    ts = ts.reset_index()
    ts = data_transformation.get_weekly_aggregate(ts)
    ts.Timestamp = pd.to_datetime(ts.dt_week, format='%Y-%m-%d')
    ts.index = ts.Timestamp
    plt.figure(figsize=(12, 8))
    plt.plot(ts["quantity"])
    plt.show()
    plt.savefig("abc.png")
    # print(ts)
    # print("done")
    norm_freq_last_year_all_series(data)
    combination_with_freq = pd.read_csv(
        "/home/aman/Desktop/CSO_drug/file_generated/freq_last_year_all_series.csv"
    )