def stats(self): gval = GroupEvaluation(np.unique(self.alloc), self.alloc.shape[1], self.foreigners, self.alphas) errors = gval.error_components(self.alloc) group_sizes = gval.counts.transpose().tolist() stats = { 'error_size': round(errors[0], 2), 'error_meetings': round(errors[1], 2), 'error': round(gval.error_total(self.alloc), 2), 'groups': { 'sizes': group_sizes, 'sizes_mean': np.round(np.mean(gval.counts), 2) } } if len(errors) == 3: stats['error_foreigners'] = round(errors[2], 2) if 'First Name' in self.df and 'Family Name': names = (self.df['First Name'] + ' ' + self.df['Family Name']).tolist() elif 'Name' in self.df: names = self.df['Name'].tolist() else: names = [f'Student {i}' for i in range(len(self.df))] meets_others = [len(others) for others in gval.others] member_stats = { 'names': names, 'meets_others': meets_others, 'meets_others_mean': np.round(np.mean(meets_others), 2) } if self.foreigners is not None: member_stats['foreigners'] = self.foreigners.tolist() member_stats['meets_non-foreigners'] = [] member_stats['meets_foreigners'] = [] for others in gval.others: _, counts = np.unique(self.foreigners[list(others)], return_counts=True) member_stats['meets_non-foreigners'].append(counts[0]) member_stats['meets_foreigners'].append(counts[1]) _, counts = np.unique(self.foreigners, return_counts=True) stats['n_non-foreigners'] = counts[0] stats['n_foreigners'] = counts[1] stats['members'] = member_stats return stats
def __init__(self, n_groups: int, n_users: int, foreigners=None, alphas=None): assert n_groups <= 8, 'The number of groups should not be too high as otherwise the algorithm takes too long.' self.groups = np.arange( n_groups ) # The groups are index-based since this simplifies the implementation (counting) self.n_users = n_users self.n_seeds = 24 # 24 might be a good choice because it has many divisors so that the number of seeds are likely to be divisible by the cpu number self.n_iterations = self.n_users * 2 self.gval = GroupEvaluation(self.groups, self.n_users, foreigners, alphas)
def test_errors(self): alloc = np.array([ [1, 2, 4, 2, 3, 0, 3, 0, 3, 4], [4, 0, 0, 3, 2, 4, 2, 4, 1, 1], [0, 4, 3, 1, 0, 2, 0, 2, 0, 2], [3, 3, 2, 0, 1, 1, 1, 3, 4, 3], [2, 1, 1, 4, 4, 3, 4, 1, 2, 0]]) foreigners = np.array([0, 0, 1, 1, 0, 1, 1, 1, 0, 1]) self.assertEqual(alloc.shape[1], len(foreigners)) gval = GroupEvaluation(np.unique(alloc), alloc.shape[1], foreigners) self.assertEqual(gval.error_group_sizes(alloc), 0.9) self.assertEqual(gval.error_meetings(alloc), 0.5429272594305719) self.assertEqual(gval.error_foreigners(alloc), 1.0645929264282177) self.assertEqual(gval.error_total(alloc), 0.8358400619529298)
import numpy as np from group_allocation_python.GroupEvaluation import GroupEvaluation # Groups are index-based alloc = np.array([[2, 2, 0, 2, 2, 1], [1, 0, 2, 0, 1, 0], [0, 1, 1, 1, 0, 2]]) gval = GroupEvaluation(np.unique(alloc), alloc.shape[1], np.array([1, 1, 1, 0, 0, 0])) print(gval.error_group_sizes(alloc)) print(gval.error_meetings(alloc)) print(gval.error_foreigners(alloc))
class GroupSearch: def __init__(self, n_groups: int, n_users: int, foreigners=None, alphas=None): assert n_groups <= 8, 'The number of groups should not be too high as otherwise the algorithm takes too long.' self.groups = np.arange( n_groups ) # The groups are index-based since this simplifies the implementation (counting) self.n_users = n_users self.n_seeds = 24 # 24 might be a good choice because it has many divisors so that the number of seeds are likely to be divisible by the cpu number self.n_iterations = self.n_users * 2 self.gval = GroupEvaluation(self.groups, self.n_users, foreigners, alphas) def total_iterations(self): return math.factorial(len( self.groups)) * self.n_seeds * self.n_iterations def find_best_allocation(self): counter = Value('i', 0) seeds = range(self.n_seeds) if self.n_iterations * self.n_seeds < 200: # Sequential execution init_counter(counter) results = [] for seed in seeds: results.append(self._start_random_walk(seed)) else: # Parallel execution pool = Pool(processes=cpu_count(), initializer=init_counter, initargs=(counter, )) results = [] for i, result in enumerate( pool.imap_unordered(self._start_random_walk, seeds)): results.append(result) pool.close() pool.join() best_error = np.inf best_combs = None for result in results: error, last_improvement, combs = result if error < best_error: best_error = error best_combs = combs return self.gval.add_last_comb(best_combs) def _inc(self): global counter with counter.get_lock(): counter.value += 1 print(counter.value / (self.n_iterations * self.n_seeds)) def _start_random_walk(self, seed): # General note: since the last day is implicitly defined by the previous days (no degree of freedom left), its combination is excluded in the following if seed == 0: # The randomly initialized starting configuration may lead to degenerated results in extreme settings like 9 users and 6 groups. For this case, it is possible that some groups don't get users at all. Since this is something we want to avoid, we add an evenly distributed starting configuration manually so that there is at least one configuration which fills each group # The goal is to produce something like the following (4 users and 3 groups): # day1: 0 0 1 2 # day2: 1 1 2 0 first_day = np.sort(np.resize(self.groups, self.n_users)) days = [first_day] for group in range(1, len(self.groups) - 1): prev_day = days[group - 1] next_day = (prev_day + 1) % len(self.groups) days.append(next_day) days = np.asarray(days) else: np.random.seed(seed) # Random combination for all users (columns) and all days (rows) days = np.stack([ np.random.choice(self.groups, size=len(self.groups) - 1, replace=False) for _ in range(self.n_users) ]).transpose() error = self.gval.error_total(days) last_improvement = -1 for i in range(self.n_iterations): idx_user = np.random.randint(0, self.n_users) # For the selected user, iterate over every possible group assignment for new_slots in list( itertools.permutations(self.groups, len(self.groups) - 1)): # Temporarily assign the user to new groups (for all days) days_copy = days.copy() days_copy[:, idx_user] = new_slots error_new = self.gval.error_total(days_copy) if error_new < error: error = error_new days = days_copy last_improvement = i # print(last_improvement, error) self._inc() return error, last_improvement, days
import numpy as np from group_allocation_python.GroupEvaluation import GroupEvaluation from multiprocessing import Pool, cpu_count from misc.MeasureTime import MeasureTime def run(combs): combs = np.array(combs).transpose() return sum(gval.error_components(combs)) n_students = 8 groups = [0, 1, 2] # foreigners = np.array([0, 0, 1, 1, 0, 0], np.int32) #, 0, 1, 1, 1, 0, 1 # assert len(foreigners) == n_students gval = GroupEvaluation(groups, n_students) if __name__ == "__main__": # Test every possible combination (works only for small problems) time_per_comb = 23.27 / 279936 comb_student = list(itertools.permutations(groups, len(groups) - 1)) n_combs = len(comb_student) ** n_students print(f'Number of combinations to check: {n_combs}') print(f'Estimated time: {time_per_comb * n_combs} s') with MeasureTime(): pool = Pool(cpu_count()) errors = pool.map(run, itertools.product(comb_student, repeat=n_students)) pool.close() pool.join()
import numpy as np from group_allocation_python.GroupEvaluation import GroupEvaluation from group_allocation_python.GroupSearch import GroupSearch if __name__ == '__main__': n_users = 4 n_groups = 3 group_search = GroupSearch(n_groups, n_users) alloc = group_search.find_best_allocation() gval = GroupEvaluation(np.unique(alloc), alloc.shape[1]) error = sum(gval.error_components(alloc)) meets = [len(others) for others in gval.others] days = [np.unique(day, return_counts=True)[1] for day in alloc] print(meets) print(days) print(error) # if __name__ == '__main__': # df = pd.read_csv('example_data.csv') # foreigners = df['Foreigner'].to_numpy().astype(np.int32) # alphas = [0.17, 0.77, 0.06] # n_groups = 3 # # group_search = GroupSearch(n_groups, len(df), foreigners, alphas) # alloc = group_search.find_best_allocation() #
# if __name__ == '__main__': # with MeasureTime(): # group_search = GroupSearch(5, 10, np.array([0, 0, 1, 1, 0, 1, 1, 1, 0, 1])) # alloc = group_search.find_best_allocation() # print(alloc) alloc = np.array([[1, 2, 4, 2, 3, 0, 3, 0, 3, 4], [4, 0, 0, 3, 2, 4, 2, 4, 1, 1], [0, 4, 3, 1, 0, 2, 0, 2, 0, 2], [3, 3, 2, 0, 1, 1, 1, 3, 4, 3], [2, 1, 1, 4, 4, 3, 4, 1, 2, 0]]) # Opt # alloc = np.array([ # [2, 0, 4, 1, 2, 1, 4, 3, 0, 3], # [4, 1, 3, 4, 3, 0, 1, 2, 2, 0], # [1, 2, 0, 3, 4, 2, 0, 4, 3, 1], # [3, 3, 1, 0, 0, 4, 2, 1, 4, 2], # [0, 4, 2, 2, 1, 3, 3, 0, 1, 4]]) foreigners = np.array([0, 0, 1, 1, 0, 1, 1, 1, 0, 1]) gval = GroupEvaluation(np.unique(alloc), alloc.shape[1], foreigners) with MeasureTime(): for _ in range(200000): # gval.error_foreigners(alloc) # gval.error_meetings(alloc) gval.error_group_sizes(alloc)