Esempio n. 1
0
 def maxmode(self, list):
     self.list = list
     if _counts(list) == 1: return mode(list)
     else:
         newlist = []
         for i in range(len(_counts(list))):
             newlist.append(_counts(list)[i][0])
         return max(newlist)
Esempio n. 2
0
def convert_to_timeseries(df):
    ts_df = pd.DataFrame(columns=df.columns)
    cat_cols = [
        'Day of the week', 'Disciplinary failure', 'Education',
        'Reason for absence', 'Seasons', 'Social drinker', 'Social smoker'
    ]
    num_cols = [
        "Transportation expense", "Distance from Residence to Work",
        "Service time", "Age", 'Monthly Utilization', "Work load Average/day ",
        "Hit target", "Son", "Pet", "Weight", "Height", "Body mass index",
        "Absenteeism time in hours"
    ]
    for i in df['Month of absence'].unique():
        x = df[df['Month of absence'] == i]
        n = pd.DataFrame(np.mean(x[num_cols]).values.reshape(1, -1),
                         columns=num_cols)
        c = pd.DataFrame(columns=cat_cols)
        for i in cat_cols:
            if len(stat._counts(x.loc[:, i])) > 1:
                c.loc[0, i] = stat._counts(x.loc[:, i])[0][0]
            else:
                c.loc[0, i] = stat.mode(x[i])
        c['Month of absence'] = np.mean(x['Month of absence'])
        #     s=pd.DataFrame(np.mean(df[df['Month of absence']==i]).values.reshape(1,-1),columns=df.columns)
        ts_df = pd.concat([ts_df, pd.concat([n, c], axis=1, sort=True)],
                          ignore_index=True,
                          sort=True)
    ts_df = ts_df.sort_values(by='Month of absence')
    ts_df.reset_index(drop=True, inplace=True)
    ts_df.drop(labels=['ID', 'Time Utilization'], axis=1, inplace=True)
    cat_cols = [
        'Day of the week', 'Disciplinary failure', 'Education',
        'Reason for absence', 'Seasons', 'Social drinker', 'Social smoker'
    ]
    for i in cat_cols:
        #     ts_df[i]=round(ts_df[i],0)
        ts_df[i] = ts_df[i].astype(object)
    ts = ts_df.set_index(keys='Month of absence')
    ts['Social smoker'] = ts['Social smoker'].astype(np.float64)
    ts['Social drinker'] = ts['Social drinker'].astype(np.float64)
    ts = pd.get_dummies(ts, drop_first=True)
    #     print(ts.head())
    ts = ts.drop(columns=[
        'Day of the week_3', 'Day of the week_4', 'Day of the week_5',
        'Day of the week_6', 'Seasons_2', 'Seasons_3', 'Seasons_4'
    ],
                 axis=1)
    return ts
Esempio n. 3
0
def impute_missing_vals(df, num_cols, cat_cols):
    df_full = pd.DataFrame(columns=df.columns)
    for j in np.unique(df.ID):
        df_n = df[df.ID == j].reset_index(drop=True)
        missing_val = df_n.isnull().sum()
        r, c = df_n.shape
        if r > 2:
            if df_n[num_cols].isnull().sum().sum() > 0:
                df_n[num_cols] = pd.DataFrame(fancyimpute.KNN(k=5).complete(
                    df_n[num_cols]),
                                              columns=num_cols)


#         for i in num_cols:
#             if len(df_n[df_n[i].isnull()])>0:
#                 df_n.loc[df_n[i].isnull(),i]=np.mean(df_n[i])
        for i in cat_cols:
            if len(df_n[df_n[i].isnull()]) > 0:
                if len(stat._counts(df_n.loc[:, i])) > 1:
                    df_n.loc[:, i] = df_n.loc[:, i].fillna(method='ffill')
                    df_n[i] = df_n[i].astype(object)
                else:
                    df_n.loc[df_n[i].isnull(), i] = stat.mode(df_n[i])
        df_full = pd.concat([df_full, df_n], ignore_index=True)
    return df_full
Esempio n. 4
0
def mode_making(variable):
    counting = st._counts(variable)
    conv_to_array = pl.array(counting[0])
    mode = conv_to_array[0]
    quantity = conv_to_array[1]

    return mode, quantity
Esempio n. 5
0
def generate_csv(size, iterations, timeout, data):

    algorithms = data.keys()

    puzzle_info = 'NPuzzle Problem,size = {}, iterations = {}, timeout = {}s\n\n'.format(
        size, iterations, timeout)
    csv_header = 'Algorithm,Heuristic,,Length_Min,Length_Max,Length_Mean,Length_Median,Length_Modes,,Time_Min,Time_Max,Time_Mean,Time_Median,Time_Modes,,Success,Timed_Up\n'

    csv_file = open(
        './stats/{}Puzzle_{}Iterations.csv'.format(size, iterations), 'w')
    csv_file.write(puzzle_info + csv_header)
    for algorithm in algorithms:
        csv_file.write('{}'.format(algorithm))
        algorithm_heuristics = data.get(algorithm)
        for heuristic_data in algorithm_heuristics:
            success_indexes = [
                i for i in range(len(heuristic_data[2]))
                if heuristic_data[2][i] != -1
            ]
            row_data = (heuristic_data[0], [
                len(s) for s in heuristic_data[1] if
                index_of(success_indexes, index_of(heuristic_data[1], s)) > -1
            ], [
                t for t in heuristic_data[2] if
                index_of(success_indexes, index_of(heuristic_data[2], t)) > -1
            ])
            success_amount = len(row_data[2])
            error_amount = iterations - success_amount
            row_text = ',{},,{},{},{},{},"{}",,{},{},{},{},"{}",,{},{}\n'.format(
                row_data[0],
                '-' if len(row_data[1]) == 0 else min(row_data[1]),
                '-' if len(row_data[1]) == 0 else max(row_data[1]),
                '-' if len(row_data[1]) == 0 else statistics.mean(row_data[1]),
                '-' if len(row_data[1]) == 0 else statistics.median(
                    row_data[1]), '-' if len(row_data[1]) == 0 else
                [modes[0] for modes in statistics._counts(row_data[1])],
                '-' if len(row_data[2]) == 0 else min(row_data[2]),
                '-' if len(row_data[2]) == 0 else max(row_data[2]),
                '-' if len(row_data[2]) == 0 else statistics.mean(row_data[2]),
                '-' if len(row_data[2]) == 0 else statistics.median(
                    row_data[2]), '-' if len(row_data[2]) == 0 else
                [modes[0] for modes in statistics._counts(row_data[2])],
                success_amount, error_amount)
            csv_file.write(row_text)
    csv_file.close()
Esempio n. 6
0
def find_max_mode(list1):
    list_table = statistics._counts(list1)
    len_table = len(list_table)

    if len_table == 1:
        max_mode = statistics.mode(list1)
    else:
        new_list = []
        for i in range(len_table):
            new_list.append(list_table[i][0])
        max_mode = max(new_list)  # use the max value here
    return max_mode
def find_max_mode(list_data):
    list_table = statistics._counts(list_data)
    len_table = len(list_table)

    if len_table == 1:
        max_mode = statistics.mode(list_data)
    else:
        new_list = []
        for i in range(len_table):
            new_list.append(list_table[i][0])
        max_mode = max(new_list)
    return max_mode
Esempio n. 8
0
def around_module(request):
    global csv_file
    global csv_info
    global csv_header
    global size
    global iterations

    lengths = []
    times = []
    yield

    min_lengths = '-' if len(lengths) == 0 else str(min(lengths))
    max_lengths = '-' if len(lengths) == 0 else str(max(lengths))
    mean_lengths = '-' if len(lengths) == 0 else str(statistics.mean(lengths))
    median_lengths = '-' if len(lengths) == 0 else str(
        statistics.median(lengths))
    modes_manhanttan_lengths = '-' if len(lengths) == 0 else str(
        [modes[0] for modes in statistics._counts(lengths)])

    min_times = '-' if len(times) == 0 else str(min(times))
    max_times = '-' if len(times) == 0 else str(max(times))
    mean_times = '-' if len(times) == 0 else str(statistics.mean(times))
    median_times = '-' if len(times) == 0 else str(statistics.median(times))
    modes_manhanttan_times = '-' if len(times) == 0 else str(
        [modes[0] for modes in statistics._counts(times)])

    csv_info = 'NPuzzle Problem.,size = ' + str(aux_size) + '\n\n'
    csv_header = 'Algorithm,Heuristic,Iterations,,Length_Min,Length_Max,Length_Mean,Length_Median,Length_Modes,,Time_Min,Time_Max,Time_Mean,Time_Median,Time_Modes,,Timed_Up\n'
    csv_file = open(
        './stats/{}-{}.csv'.format(request.module.__name__, str(now)), 'w')
    csv_file.write(csv_info)
    csv_file.write(csv_header)
    csv_row = '{},{},{},,{},{},{},{},"{}",,{},{},{},{},"{}",,{}\n'.format(
        'Iterative Deepening Search', '-', iterations, min_lengths,
        max_lengths, mean_lengths, median_lengths, modes_manhanttan_lengths,
        min_times, max_times, mean_times, median_times, modes_manhanttan_times,
        iterations - len(lengths))
    csv_file.write(csv_row)
    csv_file.close()
Esempio n. 9
0
def find_max_mode(
    list1
):  #Sophisticated Technique to find out mode. Makes sure to give the result even if there is a tie
    list_table = statistics._counts(list1)
    len_table = len(list_table)

    if len_table == 1:
        max_mode = statistics.mode(list1)
    else:
        new_list = []
        for i in range(len_table):
            new_list.append(list_table[i][0])
        max_mode = max(new_list)  # use the max value here
    return max_mode
Esempio n. 10
0
    def get_cluster_class_lable(self, cluster):

        class_count_table = _counts(cluster)
        len_table = len(class_count_table)

        if len_table == 1:  # If only one class is majority
            cluster_class_lable = mode(
                cluster)  # mode would return the majority class element value
        else:
            class_list = []
            for i in range(len_table):
                class_list.append(class_count_table[i][0])
            cluster_class_lable = min(
                class_list)  # Return the Minimum class lable
        return cluster_class_lable
Esempio n. 11
0
def find_max_mode(list1):
    #statistics._counts(list1) return count of all max numbers
    list_table = statistics._counts(list1)
    #print(list_table)    [(1, 2), (2, 2)]
    len_table = len(list_table)

    if len_table == 1:
        max_mode = statistics.mode(list1)
    else:
        new_list = []
        for i in range(len_table):
            #print(list_table[i][0])
            new_list.append(list_table[i][0])

        #print(*new_list)
        max_mode = max(new_list)  # use the max value here
    return max_mode
Esempio n. 12
0
def find_max_mode(x: list):
    """
    Calculates mode and breaks ties by choosing the max

    :param x:
    :return:
    """
    list_table = statistics._counts(x)
    len_table = len(list_table)

    if len_table == 1:
        max_mode = statistics.mode(x)
    else:
        new_list = []
        for i in range(len_table):
            new_list.append(list_table[i][0])
        max_mode = max(new_list)  # use the max value here
    return max_mode
def find_max_mode(list1):
    list_table = statistics._counts(list1)
    len_table = len(list_table)

    if len_table == 1:
        max_mode = statistics.mode(list1)
    else:
        new_list = []
        for i in range(len_table):
            new_list.append(list_table[i][0])
            if 0 not in new_list and len(new_list) > 1:
                print('********')
                print(new_list)
                print('********')
                max_mode = 0
            else:
                max_mode = max(new_list)  # use the max value here
    return max_mode
Esempio n. 14
0
def relabel(window_size):
    # Input parameters
    f1 = open("D:\Academic\data_nvspl\SRCID_LAKE017_replaced_labels.txt", 'r')
    f2 = open("D:\Academic\data_nvspl\SRCID_LAKE017_re_labels.txt", 'w')
    # window_size = 10

    # Read the labels
    labels = [int(l.split()[0]) for l in f1]
    # Then the size of image will be 33 x 30

    window_num = len(labels) / window_size

    for i1 in range(int(window_num)):
        list1 = labels[window_size * i1:window_size * (i1 + 1)]
        list_table = statistics._counts(list1)
        len_table = len(list_table)
        if len_table == 1:
            label = statistics.mode(list1)
            f2.write(str(label) + '\n')
        else:
            list2 = []
            print(i1)
            for i2 in range(len_table):
                list2.append(list_table[i2][0])
            label = max(list2)
            f2.write(str(label) + '\n')

    f1.close()
    f2.close()

    # print some information about new labels
    f3 = open("D:\Academic\data_nvspl\SRCID_LAKE017_re_labels.txt", 'r')
    new_labels = [int(l.split()[0]) for l in f3]
    print('Labels counter after re-label: ')
    print(collections.Counter(new_labels))
    f3.close()
Esempio n. 15
0
def _modes(d):
    # noinspection PyProtectedMember
    table = stats._counts(d)
    modes = [table[i][0] for i in range(len(table))]
    return modes
Esempio n. 16
0
def around_module(request):
    global csv_file
    global csv_info
    global csv_header
    global manhattan_lengths
    global manhattan_times
    global misplaced_lengths
    global misplaced_times
    global gaschnig_lengths
    global gaschnig_times
    global max_man_gasch_lengths
    global max_man_gasch_times
    global aux_size
    global aux_iterations
    global manhattan_iterations
    global gaschnig_iterations
    global misplaced_iterations

    global manhattan_best_result
    global misplaced_best_result
    global gaschnig_best_result
    global max_man_gasch_best_result

    manhattan_lengths = []
    manhattan_times = []
    misplaced_lengths = []
    misplaced_times = []
    gaschnig_lengths = []
    gaschnig_times = []
    max_man_gasch_lengths = []
    max_man_gasch_times = []

    manhattan_best_result = None
    misplaced_best_result = None
    gaschnig_best_result = None
    max_man_gasch_best_result = None

    yield
    ''' manhattan aux vars '''
    min_manhattan_lengths = '-' if len(manhattan_lengths) == 0 else str(
        min(manhattan_lengths))
    max_manhattan_lengths = '-' if len(manhattan_lengths) == 0 else str(
        max(manhattan_lengths))
    mean_manhattan_lengths = '-' if len(manhattan_lengths) == 0 else str(
        statistics.mean(manhattan_lengths))
    median_manhattan_lengths = '-' if len(manhattan_lengths) == 0 else str(
        statistics.median(manhattan_lengths))
    modes_manhanttan_lengths = '-' if len(manhattan_lengths) == 0 else str(
        [modes[0] for modes in statistics._counts(manhattan_lengths)])

    min_manhattan_times = '-' if len(manhattan_times) == 0 else str(
        min(manhattan_times))
    max_manhattan_times = '-' if len(manhattan_times) == 0 else str(
        max(manhattan_times))
    mean_manhattan_times = '-' if len(manhattan_times) == 0 else str(
        statistics.mean(manhattan_times))
    median_manhattan_times = '-' if len(manhattan_times) == 0 else str(
        statistics.median(manhattan_times))
    modes_manhanttan_times = '-' if len(manhattan_times) == 0 else str(
        [modes[0] for modes in statistics._counts(manhattan_times)])
    ''' misplaced tiles aux vars '''
    min_misplaced_lengths = '-' if len(misplaced_lengths) == 0 else str(
        min(misplaced_lengths))
    max_misplaced_lengths = '-' if len(misplaced_lengths) == 0 else str(
        max(misplaced_lengths))
    mean_misplaced_lengths = '-' if len(misplaced_lengths) == 0 else str(
        statistics.mean(misplaced_lengths))
    median_misplaced_lengths = '-' if len(misplaced_lengths) == 0 else str(
        statistics.median(misplaced_lengths))
    modes_manhanttan_lengths = '-' if len(misplaced_lengths) == 0 else str(
        [modes[0] for modes in statistics._counts(misplaced_lengths)])

    min_misplaced_times = '-' if len(misplaced_times) == 0 else str(
        min(misplaced_times))
    max_misplaced_times = '-' if len(misplaced_times) == 0 else str(
        max(misplaced_times))
    mean_misplaced_times = '-' if len(misplaced_times) == 0 else str(
        statistics.mean(misplaced_times))
    median_misplaced_times = '-' if len(misplaced_times) == 0 else str(
        statistics.median(misplaced_times))
    modes_manhanttan_times = '-' if len(misplaced_times) == 0 else str(
        [modes[0] for modes in statistics._counts(misplaced_times)])
    ''' gaschnig aux vars '''
    min_gaschnig_lengths = '-' if len(gaschnig_lengths) == 0 else str(
        min(gaschnig_lengths))
    max_gaschnig_lengths = '-' if len(gaschnig_lengths) == 0 else str(
        max(gaschnig_lengths))
    mean_gaschnig_lengths = '-' if len(gaschnig_lengths) == 0 else str(
        statistics.mean(gaschnig_lengths))
    median_gaschnig_lengths = '-' if len(gaschnig_lengths) == 0 else str(
        statistics.median(gaschnig_lengths))
    modes_manhanttan_lengths = '-' if len(gaschnig_lengths) == 0 else str(
        [modes[0] for modes in statistics._counts(gaschnig_lengths)])

    min_gaschnig_times = '-' if len(gaschnig_times) == 0 else str(
        min(gaschnig_times))
    max_gaschnig_times = '-' if len(gaschnig_times) == 0 else str(
        max(gaschnig_times))
    mean_gaschnig_times = '-' if len(gaschnig_times) == 0 else str(
        statistics.mean(gaschnig_times))
    median_gaschnig_times = '-' if len(gaschnig_times) == 0 else str(
        statistics.median(gaschnig_times))
    modes_manhanttan_times = '-' if len(gaschnig_times) == 0 else str(
        [modes[0] for modes in statistics._counts(gaschnig_times)])
    ''' max_manhattan_gaschnig aux vars '''
    min_max_man_gasch_lengths = '-' if len(
        max_man_gasch_lengths) == 0 else str(min(max_man_gasch_lengths))
    max_max_man_gasch_lengths = '-' if len(
        max_man_gasch_lengths) == 0 else str(max(max_man_gasch_lengths))
    mean_max_man_gasch_lengths = '-' if len(
        max_man_gasch_lengths) == 0 else str(
            statistics.mean(max_man_gasch_lengths))
    median_max_man_gasch_lengths = '-' if len(
        max_man_gasch_lengths) == 0 else str(
            statistics.median(max_man_gasch_lengths))
    modes_manhanttan_lengths = '-' if len(max_man_gasch_lengths) == 0 else str(
        [modes[0] for modes in statistics._counts(max_man_gasch_lengths)])

    min_max_man_gasch_times = '-' if len(max_man_gasch_times) == 0 else str(
        min(max_man_gasch_times))
    max_max_man_gasch_times = '-' if len(max_man_gasch_times) == 0 else str(
        max(max_man_gasch_times))
    mean_max_man_gasch_times = '-' if len(max_man_gasch_times) == 0 else str(
        statistics.mean(max_man_gasch_times))
    median_max_man_gasch_times = '-' if len(max_man_gasch_times) == 0 else str(
        statistics.median(max_man_gasch_times))
    modes_manhanttan_times = '-' if len(max_man_gasch_times) == 0 else str(
        [modes[0] for modes in statistics._counts(max_man_gasch_times)])

    if (manhattan_best_result == None):
        manhattan_best_result = []
    if (misplaced_best_result == None):
        misplaced_best_result = []
    if (gaschnig_best_result == None):
        gaschnig_best_result = []
    if (max_man_gasch_best_result == None):
        max_man_gasch_best_result = []

    csv_info = 'NPuzzle Problem.,size = ' + str(aux_size) + '\n\n'
    csv_header = 'Algorithm,Heuristic,Iterations,,Length_Min,Length_Max,Length_Mean,Length_Median,Length_Modes,,Time_Min,Time_Max,Time_Mean,Time_Median,Time_Modes,,Timed_Up,,Best_Result\n'

    now = datetime.now()
    csv_file = open(
        './stats/{}-{}.csv'.format(request.module.__name__, str(now)), 'w')
    csv_file.write(csv_info)
    csv_file.write(csv_header)
    ''' manhattan row '''
    csv_row = '{},{},{},,{},{},{},{},"{}",,{},{},{},{},"{}",,{},,"{}"\n'.format(
        'Greedy Best First Search', 'Manhattan', manhattan_iterations,
        min_manhattan_lengths, max_manhattan_lengths, mean_manhattan_lengths,
        median_manhattan_lengths, modes_manhanttan_lengths,
        min_manhattan_times, max_manhattan_times, mean_manhattan_times,
        median_manhattan_times, modes_manhanttan_times,
        manhattan_iterations - len(manhattan_lengths), manhattan_best_result)
    csv_file.write(csv_row)
    ''' misplaced tiles row '''
    csv_row = '{},{},{},,{},{},{},{},"{}",,{},{},{},{},"{}",,{},,"{}"\n'.format(
        '', 'Misplaced Tiles', misplaced_iterations, min_misplaced_lengths,
        max_misplaced_lengths, mean_misplaced_lengths,
        median_misplaced_lengths, modes_manhanttan_lengths,
        min_misplaced_times, max_misplaced_times, mean_misplaced_times,
        median_misplaced_times, modes_manhanttan_times,
        misplaced_iterations - len(misplaced_lengths), misplaced_best_result)
    csv_file.write(csv_row)
    ''' gaschnig row '''
    csv_row = '{},{},{},,{},{},{},{},"{}",,{},{},{},{},"{}",,{},,"{}"\n'.format(
        '', 'Gaschnig', gaschnig_iterations, min_gaschnig_lengths,
        max_gaschnig_lengths, mean_gaschnig_lengths, median_gaschnig_lengths,
        modes_manhanttan_lengths, min_gaschnig_times, max_gaschnig_times,
        mean_gaschnig_times, median_gaschnig_times, modes_manhanttan_times,
        gaschnig_iterations - len(gaschnig_lengths), gaschnig_best_result)
    csv_file.write(csv_row)
    ''' max_manhattan_gaschnig row '''
    csv_row = '{},{},{},,{},{},{},{},"{}",,{},{},{},{},"{}",,{},,"{}"\n'.format(
        '', 'Max_Manhattan_Gaschnig', max_man_gasch_iterations,
        min_max_man_gasch_lengths, max_max_man_gasch_lengths,
        mean_max_man_gasch_lengths, median_max_man_gasch_lengths,
        modes_manhanttan_lengths, min_max_man_gasch_times,
        max_max_man_gasch_times, mean_max_man_gasch_times,
        median_max_man_gasch_times, modes_manhanttan_times,
        max_man_gasch_iterations - len(max_man_gasch_lengths),
        max_man_gasch_best_result)
    csv_file.write(csv_row)

    csv_file.close()