def maxmode(self, list): self.list = list if _counts(list) == 1: return mode(list) else: newlist = [] for i in range(len(_counts(list))): newlist.append(_counts(list)[i][0]) return max(newlist)
def convert_to_timeseries(df): ts_df = pd.DataFrame(columns=df.columns) cat_cols = [ 'Day of the week', 'Disciplinary failure', 'Education', 'Reason for absence', 'Seasons', 'Social drinker', 'Social smoker' ] num_cols = [ "Transportation expense", "Distance from Residence to Work", "Service time", "Age", 'Monthly Utilization', "Work load Average/day ", "Hit target", "Son", "Pet", "Weight", "Height", "Body mass index", "Absenteeism time in hours" ] for i in df['Month of absence'].unique(): x = df[df['Month of absence'] == i] n = pd.DataFrame(np.mean(x[num_cols]).values.reshape(1, -1), columns=num_cols) c = pd.DataFrame(columns=cat_cols) for i in cat_cols: if len(stat._counts(x.loc[:, i])) > 1: c.loc[0, i] = stat._counts(x.loc[:, i])[0][0] else: c.loc[0, i] = stat.mode(x[i]) c['Month of absence'] = np.mean(x['Month of absence']) # s=pd.DataFrame(np.mean(df[df['Month of absence']==i]).values.reshape(1,-1),columns=df.columns) ts_df = pd.concat([ts_df, pd.concat([n, c], axis=1, sort=True)], ignore_index=True, sort=True) ts_df = ts_df.sort_values(by='Month of absence') ts_df.reset_index(drop=True, inplace=True) ts_df.drop(labels=['ID', 'Time Utilization'], axis=1, inplace=True) cat_cols = [ 'Day of the week', 'Disciplinary failure', 'Education', 'Reason for absence', 'Seasons', 'Social drinker', 'Social smoker' ] for i in cat_cols: # ts_df[i]=round(ts_df[i],0) ts_df[i] = ts_df[i].astype(object) ts = ts_df.set_index(keys='Month of absence') ts['Social smoker'] = ts['Social smoker'].astype(np.float64) ts['Social drinker'] = ts['Social drinker'].astype(np.float64) ts = pd.get_dummies(ts, drop_first=True) # print(ts.head()) ts = ts.drop(columns=[ 'Day of the week_3', 'Day of the week_4', 'Day of the week_5', 'Day of the week_6', 'Seasons_2', 'Seasons_3', 'Seasons_4' ], axis=1) return ts
def impute_missing_vals(df, num_cols, cat_cols): df_full = pd.DataFrame(columns=df.columns) for j in np.unique(df.ID): df_n = df[df.ID == j].reset_index(drop=True) missing_val = df_n.isnull().sum() r, c = df_n.shape if r > 2: if df_n[num_cols].isnull().sum().sum() > 0: df_n[num_cols] = pd.DataFrame(fancyimpute.KNN(k=5).complete( df_n[num_cols]), columns=num_cols) # for i in num_cols: # if len(df_n[df_n[i].isnull()])>0: # df_n.loc[df_n[i].isnull(),i]=np.mean(df_n[i]) for i in cat_cols: if len(df_n[df_n[i].isnull()]) > 0: if len(stat._counts(df_n.loc[:, i])) > 1: df_n.loc[:, i] = df_n.loc[:, i].fillna(method='ffill') df_n[i] = df_n[i].astype(object) else: df_n.loc[df_n[i].isnull(), i] = stat.mode(df_n[i]) df_full = pd.concat([df_full, df_n], ignore_index=True) return df_full
def mode_making(variable): counting = st._counts(variable) conv_to_array = pl.array(counting[0]) mode = conv_to_array[0] quantity = conv_to_array[1] return mode, quantity
def generate_csv(size, iterations, timeout, data): algorithms = data.keys() puzzle_info = 'NPuzzle Problem,size = {}, iterations = {}, timeout = {}s\n\n'.format( size, iterations, timeout) csv_header = 'Algorithm,Heuristic,,Length_Min,Length_Max,Length_Mean,Length_Median,Length_Modes,,Time_Min,Time_Max,Time_Mean,Time_Median,Time_Modes,,Success,Timed_Up\n' csv_file = open( './stats/{}Puzzle_{}Iterations.csv'.format(size, iterations), 'w') csv_file.write(puzzle_info + csv_header) for algorithm in algorithms: csv_file.write('{}'.format(algorithm)) algorithm_heuristics = data.get(algorithm) for heuristic_data in algorithm_heuristics: success_indexes = [ i for i in range(len(heuristic_data[2])) if heuristic_data[2][i] != -1 ] row_data = (heuristic_data[0], [ len(s) for s in heuristic_data[1] if index_of(success_indexes, index_of(heuristic_data[1], s)) > -1 ], [ t for t in heuristic_data[2] if index_of(success_indexes, index_of(heuristic_data[2], t)) > -1 ]) success_amount = len(row_data[2]) error_amount = iterations - success_amount row_text = ',{},,{},{},{},{},"{}",,{},{},{},{},"{}",,{},{}\n'.format( row_data[0], '-' if len(row_data[1]) == 0 else min(row_data[1]), '-' if len(row_data[1]) == 0 else max(row_data[1]), '-' if len(row_data[1]) == 0 else statistics.mean(row_data[1]), '-' if len(row_data[1]) == 0 else statistics.median( row_data[1]), '-' if len(row_data[1]) == 0 else [modes[0] for modes in statistics._counts(row_data[1])], '-' if len(row_data[2]) == 0 else min(row_data[2]), '-' if len(row_data[2]) == 0 else max(row_data[2]), '-' if len(row_data[2]) == 0 else statistics.mean(row_data[2]), '-' if len(row_data[2]) == 0 else statistics.median( row_data[2]), '-' if len(row_data[2]) == 0 else [modes[0] for modes in statistics._counts(row_data[2])], success_amount, error_amount) csv_file.write(row_text) csv_file.close()
def find_max_mode(list1): list_table = statistics._counts(list1) len_table = len(list_table) if len_table == 1: max_mode = statistics.mode(list1) else: new_list = [] for i in range(len_table): new_list.append(list_table[i][0]) max_mode = max(new_list) # use the max value here return max_mode
def find_max_mode(list_data): list_table = statistics._counts(list_data) len_table = len(list_table) if len_table == 1: max_mode = statistics.mode(list_data) else: new_list = [] for i in range(len_table): new_list.append(list_table[i][0]) max_mode = max(new_list) return max_mode
def around_module(request): global csv_file global csv_info global csv_header global size global iterations lengths = [] times = [] yield min_lengths = '-' if len(lengths) == 0 else str(min(lengths)) max_lengths = '-' if len(lengths) == 0 else str(max(lengths)) mean_lengths = '-' if len(lengths) == 0 else str(statistics.mean(lengths)) median_lengths = '-' if len(lengths) == 0 else str( statistics.median(lengths)) modes_manhanttan_lengths = '-' if len(lengths) == 0 else str( [modes[0] for modes in statistics._counts(lengths)]) min_times = '-' if len(times) == 0 else str(min(times)) max_times = '-' if len(times) == 0 else str(max(times)) mean_times = '-' if len(times) == 0 else str(statistics.mean(times)) median_times = '-' if len(times) == 0 else str(statistics.median(times)) modes_manhanttan_times = '-' if len(times) == 0 else str( [modes[0] for modes in statistics._counts(times)]) csv_info = 'NPuzzle Problem.,size = ' + str(aux_size) + '\n\n' csv_header = 'Algorithm,Heuristic,Iterations,,Length_Min,Length_Max,Length_Mean,Length_Median,Length_Modes,,Time_Min,Time_Max,Time_Mean,Time_Median,Time_Modes,,Timed_Up\n' csv_file = open( './stats/{}-{}.csv'.format(request.module.__name__, str(now)), 'w') csv_file.write(csv_info) csv_file.write(csv_header) csv_row = '{},{},{},,{},{},{},{},"{}",,{},{},{},{},"{}",,{}\n'.format( 'Iterative Deepening Search', '-', iterations, min_lengths, max_lengths, mean_lengths, median_lengths, modes_manhanttan_lengths, min_times, max_times, mean_times, median_times, modes_manhanttan_times, iterations - len(lengths)) csv_file.write(csv_row) csv_file.close()
def find_max_mode( list1 ): #Sophisticated Technique to find out mode. Makes sure to give the result even if there is a tie list_table = statistics._counts(list1) len_table = len(list_table) if len_table == 1: max_mode = statistics.mode(list1) else: new_list = [] for i in range(len_table): new_list.append(list_table[i][0]) max_mode = max(new_list) # use the max value here return max_mode
def get_cluster_class_lable(self, cluster): class_count_table = _counts(cluster) len_table = len(class_count_table) if len_table == 1: # If only one class is majority cluster_class_lable = mode( cluster) # mode would return the majority class element value else: class_list = [] for i in range(len_table): class_list.append(class_count_table[i][0]) cluster_class_lable = min( class_list) # Return the Minimum class lable return cluster_class_lable
def find_max_mode(list1): #statistics._counts(list1) return count of all max numbers list_table = statistics._counts(list1) #print(list_table) [(1, 2), (2, 2)] len_table = len(list_table) if len_table == 1: max_mode = statistics.mode(list1) else: new_list = [] for i in range(len_table): #print(list_table[i][0]) new_list.append(list_table[i][0]) #print(*new_list) max_mode = max(new_list) # use the max value here return max_mode
def find_max_mode(x: list): """ Calculates mode and breaks ties by choosing the max :param x: :return: """ list_table = statistics._counts(x) len_table = len(list_table) if len_table == 1: max_mode = statistics.mode(x) else: new_list = [] for i in range(len_table): new_list.append(list_table[i][0]) max_mode = max(new_list) # use the max value here return max_mode
def find_max_mode(list1): list_table = statistics._counts(list1) len_table = len(list_table) if len_table == 1: max_mode = statistics.mode(list1) else: new_list = [] for i in range(len_table): new_list.append(list_table[i][0]) if 0 not in new_list and len(new_list) > 1: print('********') print(new_list) print('********') max_mode = 0 else: max_mode = max(new_list) # use the max value here return max_mode
def relabel(window_size): # Input parameters f1 = open("D:\Academic\data_nvspl\SRCID_LAKE017_replaced_labels.txt", 'r') f2 = open("D:\Academic\data_nvspl\SRCID_LAKE017_re_labels.txt", 'w') # window_size = 10 # Read the labels labels = [int(l.split()[0]) for l in f1] # Then the size of image will be 33 x 30 window_num = len(labels) / window_size for i1 in range(int(window_num)): list1 = labels[window_size * i1:window_size * (i1 + 1)] list_table = statistics._counts(list1) len_table = len(list_table) if len_table == 1: label = statistics.mode(list1) f2.write(str(label) + '\n') else: list2 = [] print(i1) for i2 in range(len_table): list2.append(list_table[i2][0]) label = max(list2) f2.write(str(label) + '\n') f1.close() f2.close() # print some information about new labels f3 = open("D:\Academic\data_nvspl\SRCID_LAKE017_re_labels.txt", 'r') new_labels = [int(l.split()[0]) for l in f3] print('Labels counter after re-label: ') print(collections.Counter(new_labels)) f3.close()
def _modes(d): # noinspection PyProtectedMember table = stats._counts(d) modes = [table[i][0] for i in range(len(table))] return modes
def around_module(request): global csv_file global csv_info global csv_header global manhattan_lengths global manhattan_times global misplaced_lengths global misplaced_times global gaschnig_lengths global gaschnig_times global max_man_gasch_lengths global max_man_gasch_times global aux_size global aux_iterations global manhattan_iterations global gaschnig_iterations global misplaced_iterations global manhattan_best_result global misplaced_best_result global gaschnig_best_result global max_man_gasch_best_result manhattan_lengths = [] manhattan_times = [] misplaced_lengths = [] misplaced_times = [] gaschnig_lengths = [] gaschnig_times = [] max_man_gasch_lengths = [] max_man_gasch_times = [] manhattan_best_result = None misplaced_best_result = None gaschnig_best_result = None max_man_gasch_best_result = None yield ''' manhattan aux vars ''' min_manhattan_lengths = '-' if len(manhattan_lengths) == 0 else str( min(manhattan_lengths)) max_manhattan_lengths = '-' if len(manhattan_lengths) == 0 else str( max(manhattan_lengths)) mean_manhattan_lengths = '-' if len(manhattan_lengths) == 0 else str( statistics.mean(manhattan_lengths)) median_manhattan_lengths = '-' if len(manhattan_lengths) == 0 else str( statistics.median(manhattan_lengths)) modes_manhanttan_lengths = '-' if len(manhattan_lengths) == 0 else str( [modes[0] for modes in statistics._counts(manhattan_lengths)]) min_manhattan_times = '-' if len(manhattan_times) == 0 else str( min(manhattan_times)) max_manhattan_times = '-' if len(manhattan_times) == 0 else str( max(manhattan_times)) mean_manhattan_times = '-' if len(manhattan_times) == 0 else str( statistics.mean(manhattan_times)) median_manhattan_times = '-' if len(manhattan_times) == 0 else str( statistics.median(manhattan_times)) modes_manhanttan_times = '-' if len(manhattan_times) == 0 else str( [modes[0] for modes in statistics._counts(manhattan_times)]) ''' misplaced tiles aux vars ''' min_misplaced_lengths = '-' if len(misplaced_lengths) == 0 else str( min(misplaced_lengths)) max_misplaced_lengths = '-' if len(misplaced_lengths) == 0 else str( max(misplaced_lengths)) mean_misplaced_lengths = '-' if len(misplaced_lengths) == 0 else str( statistics.mean(misplaced_lengths)) median_misplaced_lengths = '-' if len(misplaced_lengths) == 0 else str( statistics.median(misplaced_lengths)) modes_manhanttan_lengths = '-' if len(misplaced_lengths) == 0 else str( [modes[0] for modes in statistics._counts(misplaced_lengths)]) min_misplaced_times = '-' if len(misplaced_times) == 0 else str( min(misplaced_times)) max_misplaced_times = '-' if len(misplaced_times) == 0 else str( max(misplaced_times)) mean_misplaced_times = '-' if len(misplaced_times) == 0 else str( statistics.mean(misplaced_times)) median_misplaced_times = '-' if len(misplaced_times) == 0 else str( statistics.median(misplaced_times)) modes_manhanttan_times = '-' if len(misplaced_times) == 0 else str( [modes[0] for modes in statistics._counts(misplaced_times)]) ''' gaschnig aux vars ''' min_gaschnig_lengths = '-' if len(gaschnig_lengths) == 0 else str( min(gaschnig_lengths)) max_gaschnig_lengths = '-' if len(gaschnig_lengths) == 0 else str( max(gaschnig_lengths)) mean_gaschnig_lengths = '-' if len(gaschnig_lengths) == 0 else str( statistics.mean(gaschnig_lengths)) median_gaschnig_lengths = '-' if len(gaschnig_lengths) == 0 else str( statistics.median(gaschnig_lengths)) modes_manhanttan_lengths = '-' if len(gaschnig_lengths) == 0 else str( [modes[0] for modes in statistics._counts(gaschnig_lengths)]) min_gaschnig_times = '-' if len(gaschnig_times) == 0 else str( min(gaschnig_times)) max_gaschnig_times = '-' if len(gaschnig_times) == 0 else str( max(gaschnig_times)) mean_gaschnig_times = '-' if len(gaschnig_times) == 0 else str( statistics.mean(gaschnig_times)) median_gaschnig_times = '-' if len(gaschnig_times) == 0 else str( statistics.median(gaschnig_times)) modes_manhanttan_times = '-' if len(gaschnig_times) == 0 else str( [modes[0] for modes in statistics._counts(gaschnig_times)]) ''' max_manhattan_gaschnig aux vars ''' min_max_man_gasch_lengths = '-' if len( max_man_gasch_lengths) == 0 else str(min(max_man_gasch_lengths)) max_max_man_gasch_lengths = '-' if len( max_man_gasch_lengths) == 0 else str(max(max_man_gasch_lengths)) mean_max_man_gasch_lengths = '-' if len( max_man_gasch_lengths) == 0 else str( statistics.mean(max_man_gasch_lengths)) median_max_man_gasch_lengths = '-' if len( max_man_gasch_lengths) == 0 else str( statistics.median(max_man_gasch_lengths)) modes_manhanttan_lengths = '-' if len(max_man_gasch_lengths) == 0 else str( [modes[0] for modes in statistics._counts(max_man_gasch_lengths)]) min_max_man_gasch_times = '-' if len(max_man_gasch_times) == 0 else str( min(max_man_gasch_times)) max_max_man_gasch_times = '-' if len(max_man_gasch_times) == 0 else str( max(max_man_gasch_times)) mean_max_man_gasch_times = '-' if len(max_man_gasch_times) == 0 else str( statistics.mean(max_man_gasch_times)) median_max_man_gasch_times = '-' if len(max_man_gasch_times) == 0 else str( statistics.median(max_man_gasch_times)) modes_manhanttan_times = '-' if len(max_man_gasch_times) == 0 else str( [modes[0] for modes in statistics._counts(max_man_gasch_times)]) if (manhattan_best_result == None): manhattan_best_result = [] if (misplaced_best_result == None): misplaced_best_result = [] if (gaschnig_best_result == None): gaschnig_best_result = [] if (max_man_gasch_best_result == None): max_man_gasch_best_result = [] csv_info = 'NPuzzle Problem.,size = ' + str(aux_size) + '\n\n' csv_header = 'Algorithm,Heuristic,Iterations,,Length_Min,Length_Max,Length_Mean,Length_Median,Length_Modes,,Time_Min,Time_Max,Time_Mean,Time_Median,Time_Modes,,Timed_Up,,Best_Result\n' now = datetime.now() csv_file = open( './stats/{}-{}.csv'.format(request.module.__name__, str(now)), 'w') csv_file.write(csv_info) csv_file.write(csv_header) ''' manhattan row ''' csv_row = '{},{},{},,{},{},{},{},"{}",,{},{},{},{},"{}",,{},,"{}"\n'.format( 'Greedy Best First Search', 'Manhattan', manhattan_iterations, min_manhattan_lengths, max_manhattan_lengths, mean_manhattan_lengths, median_manhattan_lengths, modes_manhanttan_lengths, min_manhattan_times, max_manhattan_times, mean_manhattan_times, median_manhattan_times, modes_manhanttan_times, manhattan_iterations - len(manhattan_lengths), manhattan_best_result) csv_file.write(csv_row) ''' misplaced tiles row ''' csv_row = '{},{},{},,{},{},{},{},"{}",,{},{},{},{},"{}",,{},,"{}"\n'.format( '', 'Misplaced Tiles', misplaced_iterations, min_misplaced_lengths, max_misplaced_lengths, mean_misplaced_lengths, median_misplaced_lengths, modes_manhanttan_lengths, min_misplaced_times, max_misplaced_times, mean_misplaced_times, median_misplaced_times, modes_manhanttan_times, misplaced_iterations - len(misplaced_lengths), misplaced_best_result) csv_file.write(csv_row) ''' gaschnig row ''' csv_row = '{},{},{},,{},{},{},{},"{}",,{},{},{},{},"{}",,{},,"{}"\n'.format( '', 'Gaschnig', gaschnig_iterations, min_gaschnig_lengths, max_gaschnig_lengths, mean_gaschnig_lengths, median_gaschnig_lengths, modes_manhanttan_lengths, min_gaschnig_times, max_gaschnig_times, mean_gaschnig_times, median_gaschnig_times, modes_manhanttan_times, gaschnig_iterations - len(gaschnig_lengths), gaschnig_best_result) csv_file.write(csv_row) ''' max_manhattan_gaschnig row ''' csv_row = '{},{},{},,{},{},{},{},"{}",,{},{},{},{},"{}",,{},,"{}"\n'.format( '', 'Max_Manhattan_Gaschnig', max_man_gasch_iterations, min_max_man_gasch_lengths, max_max_man_gasch_lengths, mean_max_man_gasch_lengths, median_max_man_gasch_lengths, modes_manhanttan_lengths, min_max_man_gasch_times, max_max_man_gasch_times, mean_max_man_gasch_times, median_max_man_gasch_times, modes_manhanttan_times, max_man_gasch_iterations - len(max_man_gasch_lengths), max_man_gasch_best_result) csv_file.write(csv_row) csv_file.close()