Esempio n. 1
0
def population_statistics(
    fea_des,
    dt,
    treat,
    tar,
    threshold,
    is_above,
    stat_funcs,
):
    """
    Inputs -
        fea_des - Str():
            Description of the data needed for the
            calculations of the stat_funcs.
            Usge - to filter data by (feature, values) pairs
        dt - Dict():
            All the data
            dt.keys() - Str()
            dt.values() - List() :: Order()
        treat - Str():
            Specific feture of the data
            * Must be in data.keys()
        tar - Str():
            Specific feture of the data
            * Must be in data.keys()
        threshold - Order():
            A value that will be compered to values in data[treat][i]
            * Must be able to cmp to data[treat][i]
        is_above - Bool():
            If true:
                creates a list of the members in data[tar],
                such that the matching data[treat] > threshold
            else:
                same but, data[treat] <= threshold.
        stat_funcs - List() :: Obj-functions:
            List of functions in statistics.py.

    Outputs -
        None

    Main functionality -
        Print the stat_funcs of the values in data[tar] while taking into
        account the matching value in data[treat].
        using data.print_details()
    """

    # Not allowed, but much prettier code
    # d = reduce(
    #     lambda ac, val: filter_by_feature(ac, val[0], val[1])[0],
    #     [dc[i.lower()] for i in fea_des.split(" ") if i.lower() in dc.keys()],
    #     data,
    # )
    new_target_data = [
        x for x, y in zip(dt[tar], dt[treat]) if (is_above ^ (y <= threshold))
    ]
    dpy.print_details(
        {tar: new_target_data},
        [tar],
        stat_funcs,
    )
Esempio n. 2
0
def population_statistics(feature_description, data, treatment, target, threshold, is_above, statistic_functions):
    """ Filters the dictionary to hold only records in which the treatment feature's value is above/equal or below
        the threshold supplied (according to is_above argument value).
        Prints statistic values of the target feature from the filtered dictionary.

        :parameters:
        feature_description -- a description of the records in data
        data -- the dictionary to filter from
        treatment -- a feature to filter by
        target -- a feature according to which we'll print the statistic values
        threshold -- a value to filter treatment's values by
        is_above -- boolean, determines whether to filter the values above threshold the opposite
        statistic_functions -- list of statistic functions in which we're interested
        :returns: None
    """
    data1 = {}
    for key in data.keys():
        data1[key] = []
    for index in range(len(data[treatment])):
        if(data[treatment][index] > threshold and is_above) or (data[treatment][index] <= threshold and not is_above):
            # copying a record to a new dictionary
            for key in data.keys():
                data1[key].append(data[key][index])

    print(feature_description)
    print_details(data1, target, statistic_functions)
Esempio n. 3
0
def main(argv):
    features = (argv[2].split(sep=", "))
    the_data = data.load_data(argv[1], features)
    statistic_functions = [statistics.sum, statistics.mean, statistics.median]
    # seasons = [2,3]
    # s = set(seasons)
    # data.filter_by_feature(data, "season", s)
    #data.print_details(the_data, ["cnt"], statistic_functions)
    summer_data = data.filter_by_feature(data, "season", 1)
    data.print_details(summer_data, ["hum", "t1", "cnt"], statistic_functions)
Esempio n. 4
0
def q1(dic):
    print("Question1:")
    feature = ["hum", "t1", "cnt"]
    categories = ["season", "is_holiday"]
    names = ["Summer:","Holiday:"]
    methods = [statistics.sum, statistics.mean, statistics.median]
    for index, category in enumerate(categories):
        print(names[index])
        data1, data2 = data.filter_by_feature(dic, category, {1})
        data.print_details(data1, feature, methods)
    print("All:")
    data.print_details(dic, feature, methods)
Esempio n. 5
0
def population_statistics(feature_description, data1, treatment, target,
                          threshold, is_above, statistic_functions):
    print(feature_description)
    dic = {}
    for key in list(data1.keys()):
        dic.setdefault(key, [])
    if is_above:
        for row, value in enumerate(data1[treatment]):
            if value > threshold:
                for key in list(data.keys()):
                    dic[key].append(data1[key][row])
    else:
        for row, value in enumerate(data1[treatment]):
            if value <= threshold:
                for key in list(data1.keys()):
                    dic[key].append(data1[key][row])
    data.print_details(dic, [target], statistic_functions)
Esempio n. 6
0
def Q1(data):
    """
    this func prints the requirements for Q1
    :param data: dict
    :return: none
    """

    print("Question 1:")
    features = ["hum", "t1", "cnt"]
    summer = (dt.filter_by_feature(data, "season", {1}))[0]
    holiday = (dt.filter_by_feature(data, "is_holiday", {1}))[0]
    all = data

    q1_categories = {"Summer": summer, "Holiday": holiday, "All": all}

    for category in q1_categories:
        print("%s:" % category)
        dt.print_details(q1_categories[category], features,
                         stats.statistic_functions)
Esempio n. 7
0
def population_statistics(feature_description, data, treatment, target,
                          threshold, is_above, statistic_functions):
    data1 = {}
    for key in data.keys():
        data1[key] = []
    #seasons=["summer","winter","automn","spring"]
    #features=feature_description.split(sep=" ")
    #if features[0] in seasons:
    #for index in range(len(data["season"])):
    #   if data[season][index] in values:
    #      for key in data.keys():
    #          data1[key].append(data[key][index])
    for index in range(len(data[treatment])):
        if ((data[treatment][index] > threshold and is_above)
                or (data[treatment][index] <= threshold and not is_above)):
            for key in data.keys():
                data1[key].append(data[key][index])

    print_details(data1, target, statistic_functions)
Esempio n. 8
0
def question1(data):
    print("Question 1:")
    print("Summer:")
    winter, non_winter = filter_by_feature(data, 'season', [1])
    features = ['hum', 't1', 'cnt']
    statistic_functions = ['sum', 'mean', 'median']
    print_details(winter, features, statistic_functions)
    print("Holiday:")
    holiday, non_holiday = filter_by_feature(data, 'is_holiday', [1])
    print_details(holiday, features, statistic_functions)
    print("All:")
    print_details(data, features, statistic_functions)
Esempio n. 9
0
def main(argv):
    """
    produce answers to Q1, Q2 by user's input
    :argument: path to scv file
    :parameter: features in csv file for assignment
    :return: none
    """
    # Q1
    print('Question 1:')
    features = argv[2].split(
        ", ")  # create a list with continuous variables only
    features.remove("season"), features.remove("is_holiday")
    stats = [statistics.sum, statistics.mean,
             statistics.med]  # create a list with statistic functions
    data_all = data.load_data(argv[1], argv[2])
    data_summer, data_not_summer = data.filter_by_features(
        data_all, "season", {1})
    print("Summer:")
    data.print_details(data_summer, features, stats)
    data_holiday, data_not_holiday = data.filter_by_features(
        data_all, "is_holiday", {1})
    print("Holiday:")
    data.print_details(data_holiday, features, stats)
    print("All:")
    data.print_details(data_all, features, stats)
    # Q2
    lim_temp = 13
    print("\nQuestion 2:")
    data_winter, data_not_winter = data.filter_by_features(
        data_all, 'season', {3})
    data_winter_holiday, data_winter_weekday = data.filter_by_features(
        data_winter, 'is_holiday', {1})
    features.remove("hum"), features.remove("t1")
    stats.remove(statistics.sum)
    print("If t1<=13.0, then:")  # prints info 13 or under degrees
    statistics.population_statistics("winter holiday records",
                                     data_winter_holiday, "t1", "cnt",
                                     lim_temp, 0, stats)
    statistics.population_statistics("winter weekday records",
                                     data_winter_weekday, "t1", "cnt",
                                     lim_temp, 0, stats)
    print("If t1>13.0, then:")  # prints info above 13 degrees
    statistics.population_statistics("winter holiday records",
                                     data_winter_holiday, "t1", "cnt",
                                     lim_temp, 1, stats)
    statistics.population_statistics("winter weekday records",
                                     data_winter_weekday, "t1", "cnt",
                                     lim_temp, 1, stats)
Esempio n. 10
0
def main(argv):
    """Main interface for running the program"""

    # "london_sample.csv", ['season', 't1', 'is_holiday', 'cnt', 'hum']
    #  Q1
    print(argv)
    d = data.load_data(argv[1], argv[2].split(', '))
    print("Question 1:")
    print("Summer:")
    d1, d2 = data.filter_by_feature(d, 'season', [1])
    data.print_details(d1, ['hum', 't1', 'cnt'],
                       [statistics.sum, statistics.mean, statistics.median])
    print("Holiday:")
    d1, d2 = data.filter_by_feature(d, 'is_holiday', [1])
    data.print_details(d1, ['hum', 't1', 'cnt'],
                       [statistics.sum, statistics.mean, statistics.median])
    print("All:")
    data.print_details(d, ['hum', 't1', 'cnt'],
                       [statistics.sum, statistics.mean, statistics.median])

    #  Q2
    print("")
    print("Question 2:")
    print("If t1<=13.0, then:")
    print("Winter holiday records:")
    data_main = data.load_data(argv[1], argv[2].split(', '))
    data_main_1, data_main_2 = data.filter_by_feature(data_main, 'season', [3])
    winter_holiday, is_not_holiday = data.filter_by_feature(
        data_main_1, 'is_holiday', [1])
    statistics.population_statistics('cnt', winter_holiday, 't1', 'cnt', 13.0,
                                     False,
                                     [statistics.mean, statistics.median])
    print("Winter weekday records:")
    statistics.population_statistics('cnt', is_not_holiday, 't1', 'cnt', 13.0,
                                     False,
                                     [statistics.mean, statistics.median])
    print("If t1>13.0, then:")
    print("Winter holiday records:")
    statistics.population_statistics('cnt', winter_holiday, 't1', 'cnt', 13.0,
                                     True,
                                     [statistics.mean, statistics.median])
    print("Winter weekday records:")
    statistics.population_statistics('cnt', is_not_holiday, 't1', 'cnt', 13.0,
                                     True,
                                     [statistics.mean, statistics.median])
Esempio n. 11
0
def main(argv):
    # loading the csv, the statistic functions and features for the program
    str_input = argv[2]
    features: list = str_input.split(", ")
    dictionary = data.load_data(argv[1], features)
    statistic_functions = [statistics.sum, statistics.mean, statistics.median]
    continuous_features = features[:3]
    """
    ##############################~~QUESTION 1~~###################################
    holder are just spare dictionaries to store the second returned dictionary,
    they are not being used during the program
    """
    data_summer, holder = data.filter_by_feature(dictionary, features[3],
                                                 SUMMER)
    data_holiday, holder = data.filter_by_feature(dictionary, features[4],
                                                  HOLIDAY)
    print(f"Question 1:")
    print(f"Summer:")
    data.print_details(data_summer, continuous_features, statistic_functions)
    print(f"Holiday:")
    data.print_details(data_holiday, continuous_features, statistic_functions)
    print(f"All:")
    data.print_details(dictionary, continuous_features, statistic_functions)
    """
    ############################~~QUESTION 2~~###############################
    """

    descriptions = ["Winter holiday records:", "Winter weekday records:"]
    data_winter, holder = data.filter_by_feature(dictionary, features[3],
                                                 WINTER)
    new_data_holiday, data_weekday = data.filter_by_feature(
        data_winter, features[4], HOLIDAY)
    list_of_dictionaries = [new_data_holiday, data_weekday]
    new_statistic_functions = [statistics.mean, statistics.median]
    signs_list = ["<=", ">"]

    print(f"\nQuestion 2:")
    for i in range(SECTIONS):
        print(f"If {features[1]}{signs_list[i]}13.0, then:")
        for j in range(SECTIONS):
            data.population_statistics(descriptions[j],
                                       list_of_dictionaries[j], features[1],
                                       features[2], THRESHOLD, i,
                                       new_statistic_functions)
Esempio n. 12
0
def main(argv):
    print("Question 1:")
    csv_path = argv[1]
    features = argv[2]
    features = list(features.split(", "))
    data = data_lib.load_data(csv_path, features)
    statistic_functions = [statistics.sum, statistics.mean, statistics.median]
    data1, data2 = data_lib.filter_by_feature(data, "season", [1])
    print("Summer:")
    data_lib.print_details(data1, ["hum", "t1", "cnt"], statistic_functions)

    data1, data2 = data_lib.filter_by_feature(data, "is_holiday", [1])
    print("Holiday:")
    data_lib.print_details(data1, ["hum", "t1", "cnt"], statistic_functions)

    print("All:")
    data_lib.print_details(data, ["hum", "t1", "cnt"], statistic_functions)

    print("Question 2:")
    t_threshold = 13
    statistic_func = [statistics.mean, statistics.median]
    print("If t1<=13.0, then:")

    data1, data2 = data_lib.filter_by_feature(data, "season", [3])
    data1, data2 = data_lib.filter_by_feature(data1, "is_holiday", [1])
    print("Winter holiday records:")
    statistics.population_statistics("Winter holiday records", data1, "t1",
                                     ["cnt"], t_threshold, False,
                                     statistic_func)
    print("Winter weekday records:")
    statistics.population_statistics("Winter weekday records", data2, "t1",
                                     ["cnt"], t_threshold, False,
                                     statistic_func)
    print("If t1>13.0, then:")
    print("Winter holiday records:")
    statistics.population_statistics("Winter holiday records", data1, "t1",
                                     ["cnt"], t_threshold, True,
                                     statistic_func)
    print("Winter weekday records:")
    statistics.population_statistics("Winter weekday records", data2, "t1",
                                     ["cnt"], t_threshold, True,
                                     statistic_func)
Esempio n. 13
0
def main(argv):
    """ Calculates and prints statistic values for features of records from the data
        collected in london.csv file.

        :parameter:
        argv -- list of arguments supplied (file to run, csv file address, features to get from the csv)
        :returns: None
    """
    # Question 1
    # Saves the features given in a list
    features = (argv[2].split(sep=", "))
    the_data = data.load_data(argv[1], features)
    statistic_functions = [sum, mean, median]
    # Saves the relevant records
    summer_data, not_summer = data.filter_by_feature(the_data, "season", [1])
    holiday_data, not_holiday = data.filter_by_feature(the_data, "is_holiday",
                                                       [1])
    print("Question 1:")
    print("Summer:")
    data.print_details(summer_data, ["hum", "t1", "cnt"], statistic_functions)
    print("Holiday:")
    data.print_details(holiday_data, ["hum", "t1", "cnt"], statistic_functions)
    print("All:")
    data.print_details(the_data, ["hum", "t1", "cnt"], statistic_functions)

    # Question 2
    print("\nQuestion 2")
    print("If t1<=13.0, then:")
    # Saves the relevant records
    winter_data, not_winter = data.filter_by_feature(the_data, "season", [3])
    w_h_data, not_w_h_data = data.filter_by_feature(winter_data, "is_holiday",
                                                    [1])
    population_statistics("Winter holiday records:", w_h_data, "t1", ["cnt"],
                          THRESHOLD, 0, statistic_functions[1:])
    population_statistics("Winter weekday records:", not_w_h_data, "t1",
                          ["cnt"], THRESHOLD, 0, statistic_functions[1:])
    print("If t1>13.0, then:")
    population_statistics("Winter holiday records:", w_h_data, "t1", ["cnt"],
                          THRESHOLD, 1, statistic_functions[1:])
    population_statistics("Winter weekday records:", not_w_h_data, "t1",
                          ["cnt"], THRESHOLD, 1, statistic_functions[1:])
Esempio n. 14
0
def main(argv):
    """
    Main function of the program
    :param argv: argv[0] = /home/student/your_path/main.py, argv[1] = /home/student/your_path/ london.csv,
    argv[2] = "hum, t1, cnt, season, is_holiday"
    :return: none
    """
    # question 1
    data_dict = data.load_data(argv[1], argv[2])
    statistic_functions = [
        statistics.sum_f, statistics.mean, statistics.median
    ]
    print("Question 1:")
    feature_list = ["hum", "t1", "cnt"]
    data_name, data_not_name = data.filter_by_value(data_dict, 'season', {1})
    data.print_details(data_name, feature_list, statistic_functions, 'Summer')
    data_name, data_not_name = data.filter_by_value(data_dict, 'is_holiday',
                                                    {1})
    data.print_details(data_name, feature_list, statistic_functions, 'Holiday')
    data.print_details(data_dict, feature_list, statistic_functions, 'All')
    print()  # before question 2

    # question 2
    print("Question 2:")
    is_above = 0
    title_list = ["Winter holiday records:", "Winter weekday records:"]
    print("If t1<=13.0, then:")
    data_winter, data_not_winter = data.filter_by_value(
        data_dict, 'season', {3})
    data_name, data_not_name = data.filter_by_value(data_winter, 'is_holiday',
                                                    {1})
    dict_list = [data_name, data_not_name]
    for m, k in enumerate(title_list):
        statistics.population_statistics(k, dict_list[m], 't1', 'cnt', 13.0,
                                         is_above, statistic_functions)
    is_above = 1
    print("If t1>13.0, then:")
    for n, l in enumerate(title_list):
        statistics.population_statistics(l, dict_list[n], 't1', 'cnt', 13.0,
                                         is_above, statistic_functions)
Esempio n. 15
0
 def print_details(self, features, statistic_functions):
     data.print_details(self.dataset.data,features,statistic_functions)
Esempio n. 16
0
def main(argv):

    data = dt.load_data(argv[1], argv[2])
    dc = {
        "spring": ("season", [0]),
        "summer": ("season", [1]),
        "autumn": ("season", [2]),
        "winter": ("season", [3]),
        "holiday": ("is_holiday", [1]),
        "weekend": ("is_weekend", [1]),
    }

    print("Question 1:")

    for mode in ["Summer", "Holiday", "All"]:
        print(f"{mode}:")
        if mode == "All":
            dt.print_details(data, ["hum", "t1", "cnt"],
                             [st.sum, st.mean, st.median])
        else:
            val = dc[mode.lower()]
            dt.print_details(
                dt.filter_by_feature(data, val[0], val[1])[0],
                ["hum", "t1", "cnt"],
                [st.sum, st.mean, st.median],
            )

    print("\nQuestion 2:")

    threshold = 13.0

    winter_data, _ = dt.filter_by_feature(data, dc["winter"][0],
                                          dc["winter"][1])
    holiday_data, weekday_data = dt.filter_by_feature(winter_data,
                                                      dc["holiday"][0],
                                                      dc["holiday"][1])

    print(f"If t1<={threshold}, then:")
    print("Winter holiday records:")
    st.population_statistics(
        "Winter holiday records",
        holiday_data,
        "t1",
        "cnt",
        threshold,
        False,
        [st.mean, st.median],
    )
    print("Winter weekday records:")
    st.population_statistics(
        "Winter weekday records",
        weekday_data,
        "t1",
        "cnt",
        threshold,
        False,
        [st.mean, st.median],
    )

    print(f"If t1>{threshold}, then:")
    print("Winter holiday records:")
    st.population_statistics(
        "Winter holiday records",
        holiday_data,
        "t1",
        "cnt",
        threshold,
        True,
        [st.mean, st.median],
    )
    print("Winter weekday records:")
    st.population_statistics(
        "Winter weekday records",
        weekday_data,
        "t1",
        "cnt",
        threshold,
        True,
        [st.mean, st.median],
    )
Esempio n. 17
0
def population_statistics(features_description, data, treatment, target,
                          threshold, is_above, statistic_functions):
    from data import print_details  # non in-function import will cause circular import
    print(features_description)
    print_details(data, target, statistic_functions)