def main(): with open('data.json') as infile: data = json.load(infile) costs = (-np.array(data['efficiency']).repeat( data['group_sizes'], axis=0).repeat(data['work_sizes'], axis=1)) assignment = pywrapgraph.LinearSumAssignment() for (i, j), cost in np.ndenumerate(costs): assignment.AddArcWithCost(i, j, int(cost)) solve_status = assignment.Solve() if solve_status == assignment.OPTIMAL: print(f'Total efficiency: {-assignment.OptimalCost()}') print() for i in range(assignment.NumNodes()): task = assignment.RightMate(i) efficiency = -assignment.AssignmentCost(i) worker_group = bisect_left(np.cumsum(data['group_sizes']) - 1, i) work_group = bisect_left(np.cumsum(data['work_sizes']) - 1, task) print( f'Worker from group {worker_group} assigned to task group {work_group}, efficiency: {efficiency}' ) elif solve_status == assignment.INFEASIBLE: print('Assignment is impossible') elif solve_status == assignment.POSSIBLE_OVERFLOW: print('Possible overflow')
def main(): cost = create_data_array() rows = len(cost) cols = len(cost[0]) assignment = pywrapgraph.LinearSumAssignment() for worker in range(rows): for task in range(cols): if cost[worker][task] != 'NA': assignment.AddArcWithCost(worker, task, cost[worker][task]) solve_status = assignment.Solve() if solve_status == assignment.OPTIMAL: print('Total cost = ', assignment.OptimalCost()) print() for i in range(0, assignment.NumNodes()): print('Worker %d assigned to task %d. Cost = %d' % (i, assignment.RightMate(i), assignment.AssignmentCost(i))) elif solve_status == assignment.INFEASIBLE: print('No assignment is possible.') elif solve_status == assignment.POSSIBLE_OVERFLOW: print( 'Some input costs are too large and may cause an integer overflow.' ) return assignment
def RunAssignmentOn4x4Matrix(): """Test linear sum assignment on a 4x4 matrix. """ num_sources = 4 num_targets = 4 cost = [[90, 76, 75, 80], [35, 85, 55, 65], [125, 95, 90, 105], [45, 110, 95, 115]] expected_cost = cost[0][3] + cost[1][2] + cost[2][1] + cost[3][0] assignment = pywrapgraph.LinearSumAssignment() for source in range(0, num_sources): for target in range(0, num_targets): assignment.AddArcWithCost(source, target, cost[source][target]) solve_status = assignment.Solve() if solve_status == assignment.OPTIMAL: print('Successful solve.') print('Total cost', assignment.OptimalCost(), '/', expected_cost) for i in range(0, assignment.NumNodes()): print('Left node %d assigned to right node %d with cost %d.' % ( i, assignment.RightMate(i), assignment.AssignmentCost(i))) elif solve_status == assignment.INFEASIBLE: print('No perfect matching exists.') elif solve_status == assignment.POSSIBLE_OVERFLOW: print('Some input costs are too large and may cause an integer overflow.')
def solve(jobs, names, costs, quadratic=True): if quadratic: costs = [[c**2 for c in b] for b in costs] rows = len(costs) cols = len(costs[0]) assignment = pywrapgraph.LinearSumAssignment() for worker in range(rows): for task in range(cols): if costs[worker][task]: assignment.AddArcWithCost(worker, task, costs[worker][task]) solve_status = assignment.Solve() if solve_status == assignment.OPTIMAL: assignments = [] for ii in range(assignment.NumNodes()): assignments.append([ names[ii], jobs[assignment.RightMate(ii)], assignment.AssignmentCost(ii) ]) if quadratic: total = 0 for assigned in assignments: assigned[2] = math.sqrt(assigned[2]) total += assigned[2] return total, assignments else: return assignment.OptimalCost(), assignments elif solve_status == assignment.INFEASIBLE: print('No assignment is possible.') elif solve_status == assignment.POSSIBLE_OVERFLOW: print( 'Some input costs are too large and may cause an integer overflow.' )
def solve(self): size = len(self.topics) # The size of the cost matrix students = [(name, ranks_to_weights(size, prefs)) for name, prefs in self.users.items()] # Shuffle students so none are given an advantage based on input # ordering. random.shuffle(students) # If we have more presentations than students, add students with # no preferences to provide a square cost matrix for the solver dummies = max(0, size - len(students)) for _ in range(dummies): students.append((None, {})) # At this point the cost matrix must be square assert len(students) == size, "More students than topics" # Build the graph # Students are one set of nodes on the bipartite graph, # presentations the other. graph = pywrapgraph.LinearSumAssignment() for row, (name, weights) in enumerate(students): for col in range(size): # Invert weight for min-solver weight = -weights.get(col, 0) graph.AddArcWithCost(row, col, weight) status = graph.Solve() assert status == graph.OPTIMAL, 'Not all students could be assigned to a presentation' sol = { name: graph.RightMate(i) for i, (name, _) in enumerate(students) if name is not None } return sol
def main(cost): # 1: create the solver rows = len(cost) cols = len(cost[0]) assignment = pywrapgraph.LinearSumAssignment() start = time.time() # 2: add costs to the solver for worker in range(rows): for task in range(cols): if cost[worker][task]: assignment.AddArcWithCost(worker, task, int(cost[worker][task])) print('2. time for initializing the linear assignment solver = {:.6f}'. format(time.time() - start)) # 3: invoke the solver opt = 0 start = time.time() solve_status = assignment.Solve() print('3. time for the algorithm = {:.6f}'.format(time.time() - start)) f.write(f'{len(cost)} {time.time() - start}\n') if solve_status == assignment.OPTIMAL: opt = assignment.OptimalCost() print('Total cost = ', opt, '\n') # for i in range(assignment.NumNodes()): # print('worker {} assigned to task {}. cost = {}'.format(i, assignment.RightMate(i), assignment.AssignmentCost(i))) elif solve_status == assignment.INFEASIBLE: print('No assignment is possible') elif solve_status == assignment.POSSIBLE_OVERFLOW: print( 'Some inputs costs are too large and may cause an integer overflow' ) return opt
def lsa_solve_ortools(costs): """Solves the LSA problem using Google's optimization tools. """ from ortools.graph import pywrapgraph if costs.shape[0] != costs.shape[1]: # ortools assumes that the problem is square. # Non-square problem will be infeasible. # Default to scipy solver rather than add extra zeros. # (This maintains the same behaviour as previous versions.) return linear_sum_assignment(costs, solver='scipy') rs, cs = np.isfinite(costs).nonzero() # pylint: disable=unbalanced-tuple-unpacking finite_costs = costs[rs, cs] scale = find_scale_for_integer_approximation(finite_costs) if scale != 1: warnings.warn('costs are not integers; using approximation') int_costs = np.round(scale * finite_costs).astype(int) assignment = pywrapgraph.LinearSumAssignment() # OR-Tools does not like to receive indices of type np.int64. rs = rs.tolist() # pylint: disable=no-member cs = cs.tolist() int_costs = int_costs.tolist() for r, c, int_cost in zip(rs, cs, int_costs): assignment.AddArcWithCost(r, c, int_cost) status = assignment.Solve() try: _ortools_assert_is_optimal(pywrapgraph, status) except AssertionError: # Default to scipy solver rather than add finite edges. # (This maintains the same behaviour as previous versions.) return linear_sum_assignment(costs, solver='scipy') return _ortools_extract_solution(assignment)
def compute_state_space_distance(model_states, data_states, diff_threshold=None): """ Quantify the difference between model states and data states: each state is a binary vector. :param model_states: array-like, each row is a state :param data_states: array-like, each row is a state :param diff_threshold: if the number of states differ more than this threshold, then no need to compute the exact distance. Instead, simply return abs(#data_states - #model_states) * #genes * 2. If set to be None, ignore it. :return: a non-negative scalar representing the space distance/dissimilarity Model the two state space matching as an assignment problem: minimum weight perfect matching in a weighted bipartite graph. If one space A has more states, then complement the other space B with virtual states, whose distance from each state in B is identical, defined to be the number of genes, target_index.e., number of columns in model_states and data_states. """ if not isinstance(data_states, np.ndarray): data_states = np.array(list(data_states)) assert data_states.ndim == 2, 'The data_states must be of 2 dimension.' if diff_threshold is not None: n_genes = data_states.shape[1] model_states = model_states.reshape((-1, n_genes)) if abs(model_states.shape[0] - data_states.shape[0]) >= diff_threshold: return abs(model_states.shape[0] - data_states.shape[0]) * n_genes * 2 d = _compute_hamming_distance(model_states, data_states) # distance matrix assignment = pywrapgraph.LinearSumAssignment() # linear assignment problem solver # add arc with cost m = d.shape[0] for i in range(m): for j in range(m): assignment.AddArcWithCost(i, j, int(d[i, j])) if assignment.Solve() == assignment.OPTIMAL: return assignment.OptimalCost() raise RuntimeError('Failed to solve the assignment problem!')
def best_permutation(A, B): costs = np.round(np.square(dist(A, B))) assignment = pywrapgraph.LinearSumAssignment() for a in range(costs.shape[0]): for b in range(costs.shape[1]): assignment.AddArcWithCost(a, b, int(costs[a, b])) status = assignment.Solve() return [assignment.RightMate(a) for a in range(costs.shape[0])]
def lsa_solve_ortools(costs): """Solves the LSA problem using Google's optimization tools.""" from ortools.graph import pywrapgraph # Google OR tools only support integer costs. Here's our attempt # to convert from floating point to integer: # # We search for the minimum difference between any two costs and # compute the first non-zero digit after the decimal place. Then # we compute a factor,f, that scales all costs so that the difference # is integer representable in the first digit. # # Example: min-diff is 0.001, then first non-zero digit place -3, so # we scale by 1e3. # # For small min-diffs and large costs in general there is a change of # overflowing. valid = np.isfinite(costs) min_e = -8 unique = np.unique(costs[valid]) if unique.shape[0] == 1: min_diff = unique[0] elif unique.shape[0] > 1: min_diff = np.diff(unique).min() else: min_diff = 1 min_diff_e = 0 if min_diff != 0.0: min_diff_e = int(np.log10(np.abs(min_diff))) if min_diff_e < 0: min_diff_e -= 1 e = min(max(min_e, min_diff_e), 0) f = 10**abs(e) assignment = pywrapgraph.LinearSumAssignment() for r in range(costs.shape[0]): for c in range(costs.shape[1]): if valid[r, c]: assignment.AddArcWithCost(r, c, int(costs[r, c] * f)) if assignment.Solve() != assignment.OPTIMAL: return linear_sum_assignment(costs, solver='scipy') if assignment.NumNodes() == 0: return np.array([], dtype=np.int64), np.array([], dtype=np.int64) pairings = [] for i in range(assignment.NumNodes()): pairings.append([i, assignment.RightMate(i)]) indices = np.array(pairings, dtype=np.int64) return indices[:, 0], indices[:, 1]
def main(argv): init_globals(argv) # Load and sort the courses by name courses = sorted(loader.load_courses(), key=lambda course: course.title) # Set 'nodes' property for each course. node_count = set_course_nodes(courses) # Set cost for each course: the higher 'max_students', the lower the cost m = max([course.max_students for course in courses]) for course in courses: course.cost = m - course.max_students + 1 # Load and randomly shuffle the students students = loader.load_students() for i in range(3): shuffle(students) student_count = len(students) if student_count > node_count: logger.error('We do NOT have enough course places: %d < %d!', node_count, student_count) sys.exit(0) costs = create_costs(students, courses, node_count) # Students rows = len(costs) # Courses (or more exactly the number of course places) cols = len(costs[0]) logger.info( 'The cost matrix has %dx%d dimension (students x course places).', rows, cols) assignment = pywrapgraph.LinearSumAssignment() for student in range(rows): for course in range(cols): # 'NA' means that the student is NOT interested in this course if costs[student][course] != 'NA': assignment.AddArcWithCost(student, course, costs[student][course]) solve_status = assignment.Solve() if solve_status == assignment.OPTIMAL: logger.info('Total optimal cost is %d.', assignment.OptimalCost()) for i in range(0, assignment.NumNodes()): crse = get_course_by_node_id(courses, assignment.RightMate(i)) std = get_student(students, i) # If student NOT found, then this is a ghost one, no need to consider if std is not None: crse.add_student(std) # Store course ID only std.course = crse.id std.cost = assignment.AssignmentCost(i) writer.write_courses(courses) # Sort the students before outputting them students = utility.sort_students(students) writer.write_students(students, courses) writer.close() elif solve_status == assignment.INFEASIBLE: logger.error('No assignment is possible.') elif solve_status == assignment.POSSIBLE_OVERFLOW: logger.error( 'Some input costs are too large and may cause an integer overflow.' )
def main(): with open('lab2data.txt') as file: worker_groups = list(map(int, file.readline().split(' '))) task_groups = list(map(int, file.readline().split(' '))) cost = [] for i in range(len(worker_groups)): cost.append(list(map(int, file.readline().split(' ')))) worker_num = sum(worker_groups) task_num = sum(task_groups) efficiency = [[0 for x in range(task_num)] for y in range(worker_num)] workers_and_tasks = [[(0, 0) for x in range(task_num)] for y in range(worker_num)] row_ind = 0 col_ind = 0 for i, worker_group in enumerate(worker_groups): for j, task_group in enumerate(task_groups): for k in range(worker_group): for l in range(task_group): efficiency[k + row_ind][l + col_ind] = -cost[i][j] workers_and_tasks[k + row_ind][l + col_ind] = (i + 1, j + 1) col_ind += task_group row_ind += worker_group col_ind = 0 assignment = pywrapgraph.LinearSumAssignment() for worker in range(worker_num): for task in range(task_num): if efficiency[worker][task]: assignment.AddArcWithCost(worker, task, efficiency[worker][task]) solve_status = assignment.Solve() if solve_status == assignment.OPTIMAL: print(f'Total efficiency: {-assignment.OptimalCost()}') print() for i in range(assignment.NumNodes()): worker = i group = assignment.RightMate(i) efficiency = -assignment.AssignmentCost(i) w, t = workers_and_tasks[worker][group] print( f'Worker from group {w} assigned to task type {t}, efficiency: {efficiency}' ) elif solve_status == assignment.INFEASIBLE: print('No assignment is possible.') elif solve_status == assignment.POSSIBLE_OVERFLOW: print( 'Some input costs are too large and may cause an integer overflow.' )
def update_flow(self): print('mgd info: update flow') feat_num = len(self.adj_matrix) self.guided_inds = [] for i in range(feat_num): _col_ind = [] sc, tc = self.adj_matrix[i].shape _adj_mat = [] if sc != tc: _adj_mat = np.concatenate( [self.adj_matrix[i] for _ in range(tc // sc)], axis=0) else: _adj_mat = self.adj_matrix[i] cost = _adj_mat / self.num_tracked_imgs start = time.time() assignment = pywrapgraph.LinearSumAssignment() rows, cols = cost.shape # shave cols = rows if self.shave else cols for r in range(rows): for c in range(cols): assignment.AddArcWithCost(r, c, int(1e5 * cost[r][c])) solve_status = assignment.Solve() if solve_status == assignment.OPTIMAL: _col_ind = [ assignment.RightMate(n) for n in range(0, assignment.NumNodes()) ] cost_sum = sum( assignment.AssignmentCost(n) for n in range(0, assignment.NumNodes())) print( 'mgd info: solve assignment for stage {}\tflow matrix shape: {}\ttime: {:.5f}\tcost: {:.5f}' .format(i, cost.shape, time.time() - start, 1e-5 * cost_sum)) if self.distributed: flow_inds = torch.from_numpy( np.asarray(_col_ind)).long().cuda() # broadcast to all gpus torch.distributed.broadcast(flow_inds, src=0) else: flow_inds = torch.from_numpy(np.asarray(_col_ind)).long() self.guided_inds.append(flow_inds)
def assignment_bench(json_dict): keys = list(json_dict.keys()) values = list(json_dict.values()) matcher = Graph.BipartiteMatcher(keys, values, SuffixArray) time_init = time() matcher.set_prefs(Graph.vertex_diff) time_end_prefs = time() assignment = pywrapgraph.LinearSumAssignment() for these in [matcher.left, matcher.right]: for this in these: for rating in this.ratings: assignment.AddArcWithCost(this.idx, rating[0].idx, int(rating[1])) solve_status = assignment.Solve() time_end = time() prefs_time = time_end_prefs - time_init total_time = time_end - time_init acc, errs = 0, [] if solve_status == assignment.OPTIMAL: acc, errs = accuracy_assignment(assignment, matcher, json_dict) for i in errs: print('----------------------> %s\n' ' got mapped to : %s\n' ' but should have been: %s\n' ' Cost = %d' % ( matcher.left[i], matcher.right[assignment.RightMate(i)], matcher.right[i], assignment.AssignmentCost(i))) print(f'{len(errs)} wrong assignments') print('Accuracy:', acc) print('Total cost:', assignment.OptimalCost()) elif solve_status == assignment.INFEASIBLE: print('No assignment is possible.') elif solve_status == assignment.POSSIBLE_OVERFLOW: print( 'Some input costs are too large and may cause an integer overflow.') print(f'Preferences run time: {prefs_time} seconds' f' ({prefs_time / 60} minutes)') print(f'Total run time: {total_time} seconds' f' ({total_time / 60} minutes)') return { 'total_time': total_time, 'preferences_time': prefs_time, 'accuracy': acc, 'errors': errs }
def run(costs): f = 1e3 valid = np.isfinite(costs) # A lot of time in ortools is being spent in constructing the graph. assignment = pywrapgraph.LinearSumAssignment() for r in range(costs.shape[0]): for c in range(costs.shape[1]): if valid[r, c]: assignment.AddArcWithCost(r, c, int(costs[r, c] * f)) # No error checking for now assignment.Solve() return assignment.OptimalCost() / f
def solve_matching_problem(cost_matrix, multiplier_for_db_print=1.0): assignment = pywrapgraph.LinearSumAssignment() for ground_truth in range(len(cost_matrix)): for prediction in range(len(cost_matrix[0])): try: assignment.AddArcWithCost(ground_truth, prediction, cost_matrix[prediction][ground_truth]) except: print(cost_matrix[prediction][ground_truth]) import ipdb; ipdb.set_trace() raise check_status(assignment.Solve(), assignment) debug_print_assignments(assignment, multiplier_for_db_print) return assignment
def main(): dumped_costs = 0 cost = createDataArray() course_dict = createCourseDict() rows = len(cost) cols = len(cost[0]) outputFile = open("output.txt", "w+") outputFile.write( "Hello Professor Howell! Here's your TA assignment result. \r\n") assignment = pywrapgraph.LinearSumAssignment() for ta in range(rows): for recitation in range(cols): if cost[ta][recitation]: assignment.AddArcWithCost(ta, recitation, cost[ta][recitation]) solve_status = assignment.Solve() if solve_status == assignment.OPTIMAL: print('Total cost = ', assignment.OptimalCost()) for i in range(0, assignment.NumNodes()): if assignment.AssignmentCost(i) < THRESHOLD: result = 'Applicant #%d is assigned to Recitation %s. Cost = %d' % ( i + 1, course_dict[assignment.RightMate(i)], assignment.AssignmentCost(i)) else: result = 'Applicant #%d cannot be assigned to any recitation.' % ( i + 1) dumped_costs += 999999 print(result) outputFile.write(result + "\r\n") print() print('Total cost = ', (assignment.OptimalCost() - dumped_costs)) elif solve_status == assignment.INFEASIBLE: print('No assignment is possible.') elif solve_status == assignment.POSSIBLE_OVERFLOW: print( 'Some input costs are too large and may cause an integer overflow.' ) outputFile.close() print() print("Please check out output.txt in your current directory.")
def main(): """Linear Sum Assignment example.""" # [START solver] assignment = pywrapgraph.LinearSumAssignment() # [END solver] # [START data] costs = [ [90, 76, 75, 70], [35, 85, 55, 65], [125, 95, 90, 105], [45, 110, 95, 115], ] num_workers = len(costs) num_tasks = len(costs[0]) # [END data] # [START constraints] for worker in range(num_workers): for task in range(num_tasks): if costs[worker][task]: assignment.AddArcWithCost(worker, task, costs[worker][task]) # [END constraints] # [START solve] status = assignment.Solve() # [END solve] # [START print_solution] if status == assignment.OPTIMAL: print(f'Total cost = {assignment.OptimalCost()}\n') for i in range(0, assignment.NumNodes()): print(f'Worker {i} assigned to task {assignment.RightMate(i)}.' + f' Cost = {assignment.AssignmentCost(i)}') elif status == assignment.INFEASIBLE: print('No assignment is possible.') elif status == assignment.POSSIBLE_OVERFLOW: print( 'Some input costs are too large and may cause an integer overflow.' )
def main(): file_path = sys.argv[1] input_str = open(file_path, 'r').read() input_data = json.loads(input_str) if os.path.isfile(file_path): os.remove(file_path) costs = input_data['costs'] rows = len(costs) cols = len(costs[0]) assignment = pywrapgraph.LinearSumAssignment() for worker in range(rows): for task in range(cols): if costs[worker][task] != 'unknown': assignment.AddArcWithCost(worker, task, costs[worker][task]) solve_status = assignment.Solve() if solve_status == assignment.OPTIMAL: assignment_result = {'assignment': []} for i in range(0, assignment.NumNodes()): assignment_result['assignment'].append({ 'worker': i, 'task': assignment.RightMate(i) }) print(json.dumps(assignment_result)) if solve_status == assignment.INFEASIBLE: raise Exception('No assignment is possible.') if solve_status == assignment.POSSIBLE_OVERFLOW: raise Exception( 'Some input costs are too large and may cause an integer overflow.' )
def main(): cost = create_data_array() rows = len(cost) cols = len(cost[0]) assignment = pywrapgraph.LinearSumAssignment() for worker in range(rows): for task in range(cols): if cost[worker][task]: assignment.AddArcWithCost(worker, task, cost[worker][task]) solve_status = assignment.Solve() if solve_status == assignment.OPTIMAL: print("Total cost = ", assignment.OptimalCost()) print() for i in range(0, assignment.NumNodes()): print("Friend %s assigned to task %s. Cost = %d" % (friends[i], commanders[assignment.RightMate(i)], assignment.AssignmentCost(i))) elif solve_status == assignment.INFEASIBLE: print("No assignment is possible.") elif solve_status == assignment.POSSIBLE_OVERFLOW: print( "Some input costs are too large and may cause an integer overflow." )
def main(): print("What kind of assignment problem are you trying to solve?\nEnter:") problem = int(input("1: Balanced Minimization\n2: Unbalanced Minimization\n3: Balanced Maximization\n4: Unbalanced Maximization")) if problem==1: cost = balanced_minimization() rows = len(cost) cols = len(cost[0]) elif problem==2: cost = unbalanced_minimization() rows = len(cost) cols = len(cost[0]) elif problem==3: cost,maxim_matrix = balanced_maximization() rows = len(cost) cols = len(cost[0]) elif problem==4: cost,maxim_matrix = unbalanced_maximization() rows = len(cost) cols = len(cost[0]) else: print("Invalid input") exit(0) #linear assignment solver, a specialized solver for the assignment problem #following code creates the solver assignment = pywrapgraph.LinearSumAssignment() #The following code adds the costs to the solver by looping over workers and #tasks. for worker in range(rows): for task in range(cols): #if cost[worker][task]: #creating the bipartite graph that will be used to solve the problem assignment.AddArcWithCost(worker, task, cost[worker][task]) #The following code invokes the solver and displays the solution. solve_status = assignment.Solve() #checks if an optimal solution exists and displays the result accordingly if solve_status == assignment.OPTIMAL: print('Total cost = ', assignment.OptimalCost()) print() maxim_sale=0 if problem ==1 or problem==2: for i in range(0, assignment.NumNodes()): print('Worker %d assigned to task %d. Cost = %d. ' % ( i+1, assignment.RightMate(i)+1, assignment.AssignmentCost(i))) if problem ==3 or problem==4: for i in range(0, assignment.NumNodes()): print('Worker %d assigned to task %d. Cost = %d. Sale is = %d' % ( i+1, assignment.RightMate(i)+1, assignment.AssignmentCost(i), maxim_matrix[i][assignment.RightMate(i)])) maxim_sale += maxim_matrix[i][assignment.RightMate(i)] print("\n\nMaximum Sale is: %d" % (maxim_sale)) #if the solution is infeasible elif solve_status == assignment.INFEASIBLE: print('No assignment is possible.') #if the solution has very large input costs elif solve_status == assignment.POSSIBLE_OVERFLOW: print('Some input costs are too large and may cause an integer overflow.')
def main(_): ps_hosts = FLAGS.ps_hosts.split(",") worker_hosts = FLAGS.worker_hosts.split(",") worker_num = len(worker_hosts) # Create a cluster from the parameter server and worker hosts. cluster = tf.train.ClusterSpec({"ps": ps_hosts, "worker": worker_hosts}) # Start a server for a specific task server = tf.train.Server(cluster, job_name=FLAGS.job_name, task_index=FLAGS.task_index) #print("I'm worker %d and my server target is, "%FLAGS.task_index, server.target) if FLAGS.job_name == "ps": server.join() elif FLAGS.job_name == "worker": is_chief = FLAGS.task_index == 0 if is_chief: # Create reader for reading raw data reader = Reader(FLAGS.data_path, FLAGS.dataset, FLAGS.vocab_size, FLAGS.batch_size, FLAGS.num_steps) # Create data from reader train_path = os.path.join(FLAGS.data_path, FLAGS.dataset, "%s.train.txt" % FLAGS.dataset) test_path = os.path.join(FLAGS.data_path, FLAGS.dataset, "%s.test.txt" % FLAGS.dataset) train_valid_data, _ = reader.read_file(train_path) test_data, test_step = reader.read_file(test_path) # Create options for each model train_opt = Option("train") valid_opt = Option("valid") test_opt = Option("test") with tf.device( tf.train.replica_device_setter( worker_device="/job:worker/task:%d" % FLAGS.task_index, cluster=cluster)): print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.hidden_size)) train_model = LightRNN(train_opt, reuse=False) valid_model = LightRNN(valid_opt, reuse=True) test_model = LightRNN(test_opt, reuse=True) with tf.device("/job:ps/task:0"): with tf.variable_scope("loss_matrix"): loss_matrix_r = tf.get_variable( "loss_matrix_r", [FLAGS.vocab_size, FLAGS.lightrnn_size], initializer=tf.constant_initializer(0.0), dtype=tf.float32, trainable=False) loss_matrix_c = tf.get_variable( "loss_matrix_c", [FLAGS.vocab_size, FLAGS.lightrnn_size], initializer=tf.constant_initializer(0.0), dtype=tf.float32, trainable=False) loss_matrix_update_r = tf.scatter_add( loss_matrix_r, tf.reshape(train_model.target, [-1]), train_model.output_loss_r, use_locking=True) loss_matrix_update_c = tf.scatter_add( loss_matrix_c, tf.reshape(train_model.target, [-1]), train_model.output_loss_c, use_locking=True) loss_matrix_update_op = tf.group(loss_matrix_update_r, loss_matrix_update_c) with tf.variable_scope("helper"): # Define training variables and ops adjustion = tf.get_variable( "adjustion", shape=[], dtype=tf.bool, initializer=tf.constant_initializer(False), trainable=False) pre_adjustion = tf.get_variable( "pre_adjustion", shape=[], dtype=tf.bool, initializer=tf.constant_initializer(False), trainable=False) do_adjustion = adjustion.assign(True) do_pre_adjustion = pre_adjustion.assign(True) epoch = tf.get_variable("epoch", shape=[], dtype=tf.int32, initializer=tf.constant_initializer(0), trainable=False) increment_epoch = tf.assign_add(epoch, 1) helper_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="helper") loss_matrix_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="loss_matrix") model_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="model") model_saver = tf.train.Saver(model_vars) with tf.Session(server.target, config=tf.ConfigProto( allow_soft_placement=True, log_device_placement=False)) as sess: # Create a FileWriter to write summaries summary_writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph) sess.run(tf.global_variables_initializer()) print("Variables initialized ...") if is_chief: # Create coordinator to control threads coord = tf.train.Coordinator() # Create start threads function start_threads = start_threads_func(reader, sess, coord) if FLAGS.restore: ckpt_path = tf.train.latest_checkpoint(FLAGS.model_dir) if ckpt_path: model_saver.restore( sess, tf.train.latest_checkpoint(FLAGS.model_dir)) print("Read model parameters from %s" % tf.train.latest_checkpoint(FLAGS.model_dir)) else: print("model doesn't exists") if is_chief and FLAGS.restore_rc: # Load wordid2r and wordid2c to update reader wordid2rc_path = os.path.join(FLAGS.model_dir, "wordid2rc.pkl") if os.path.isfile(wordid2rc_path): with open(wordid2rc_path, 'rb') as wordid2rc_file: reader.wordid2r = pickle.load(wordid2rc_file) reader.wordid2c = pickle.load(wordid2rc_file) # Count step num for summary global_train_step = 0 global_epoch = 0 for adjust_iter in range(FLAGS.max_adjust_iters): print("start training with new wordid2id") if adjust_iter > 0: sess.run(tf.variables_initializer(helper_vars)) sess.run(tf.variables_initializer(loss_matrix_vars)) if FLAGS.restart_after_adjustion: sess.run(tf.variables_initializer(model_vars)) if is_chief: # Randomly partition data into train and valid set train_data, train_step, valid_data, valid_step = split_train_valid_data( train_valid_data) ppl_history = [] current_epoch = 0 while not adjustion.eval(): if is_chief: threads = start_threads(train_data, train_model, FLAGS.thread_num) if not pre_adjustion.eval(): loss = 0.0 step = 0 start_time = time.time() run_options = tf.RunOptions(timeout_in_ms=300) while current_epoch == epoch.eval(): try: target, loss_val, _, train_summary = sess.run( [ train_model.target, train_model.loss, train_model.train_op, train_model.loss_summary_op ], options=run_options) loss += loss_val step += 1 global_train_step += 1 if is_chief: summary_writer.add_summary( train_summary, global_train_step) except tf.errors.DeadlineExceededError: if is_chief and coord.should_stop(): coord.join(threads, stop_grace_period_secs=10) coord.clear_stop() sess.run(increment_epoch) pass speed = step * FLAGS.num_steps * FLAGS.batch_size // ( time.time() - start_time) train_ppl = np.exp(loss / step) print( "TaskID: {} Train epoch: {} Train-PPL: {:.2f} step: {} speed: {} wps" .format(FLAGS.task_index, current_epoch // 2, train_ppl, step, speed)) else: run_options = tf.RunOptions(timeout_in_ms=300) while current_epoch == epoch.eval(): try: sess.run(loss_matrix_update_op, options=run_options) except tf.errors.DeadlineExceededError: if is_chief and coord.should_stop(): coord.join(threads, stop_grace_period_secs=10) coord.clear_stop() sess.run(increment_epoch) pass current_epoch += 1 global_epoch += 1 if is_chief: if not pre_adjustion.eval(): loss = 0.0 step = 0 threads = start_threads(valid_data, valid_model, 1) while step < valid_step: loss_val = sess.run(valid_model.loss) loss += loss_val step += 1 valid_ppl = np.exp(loss / step) print("Valid epoch: {} Valid-PPL: {:.2f} step: {}". format(current_epoch // 2, valid_ppl, step)) valid_summary = tf.Summary(value=[ tf.Summary.Value(tag="valid_ppl", simple_value=valid_ppl), ]) summary_writer.add_summary(valid_summary, global_epoch) coord.join(threads, stop_grace_period_secs=10) coord.clear_stop() step = 0 acc_list = [] threads = start_threads(test_data, test_model, 1) while step < test_step: step_acc = sess.run(test_model.accuracy) acc_list.append(step_acc) step += 1 test_acc = sum(acc_list) / len(acc_list) print( "Test epoch: {} Test-Accuracy: {:.4f} step: {}" .format(current_epoch // 2, test_acc, step)) test_summary = tf.Summary(value=[ tf.Summary.Value(tag="test_acc", simple_value=test_acc) ]) summary_writer.add_summary(test_summary, global_epoch) coord.join(threads, stop_grace_period_secs=10) coord.clear_stop() # If valid data performs bad, decay learning rate current_lr = train_model.lr.eval() if not FLAGS.use_adam and current_lr > 0.005 and len( ppl_history ) > 0 and valid_ppl > ppl_history[-1]: current_lr *= FLAGS.lr_decay_factor train_model.update_lr(sess, current_lr) # If converged, do dictionary adjustion if (current_epoch // 2) == 10: #if current_epoch > 0 and current_epoch % 2 == 0: #if(len(ppl_history) >= 30 and valid_ppl > max(ppl_history[-5:])): #if len(ppl_history) >= 2 and global_valid_ppl.eval() > ppl_history[-1]: sess.run(do_pre_adjustion) ppl_history.append(valid_ppl) else: _loss_matrix_r = loss_matrix_r.eval() _loss_matrix_c = loss_matrix_c.eval() print("Saving model...") # Save graph and model parameters model_path = os.path.join(FLAGS.model_dir, FLAGS.model_name) model_saver.save(sess, model_path) # Save wordid2r and wordid2c in reader wordid2rc_path = os.path.join( FLAGS.model_dir, "wordid2rc.pkl") with open(wordid2rc_path, 'wb') as wordid2rc_file: pickle.dump(reader.wordid2r, wordid2rc_file) pickle.dump(reader.wordid2c, wordid2rc_file) sess.run(do_adjustion) sess.run(increment_epoch) else: while current_epoch == epoch.eval(): pass current_epoch += 1 if is_chief: print("start adjusting...") _loss_matrix_r = np.repeat(_loss_matrix_r, FLAGS.lightrnn_size, axis=1) _loss_matrix_c = np.tile(_loss_matrix_c, [1, FLAGS.lightrnn_size]) _loss_matrix = _loss_matrix_r + _loss_matrix_c # Some words didn't appear in train set so their losses are 0 mean_loss = np.mean(_loss_matrix, axis=1, keepdims=True) mean_loss[mean_loss == 0] = 1 matrix = ((_loss_matrix / mean_loss) * 10000).astype(int).tolist() # Use ortools to optimize the dictionary assignment = pywrapgraph.LinearSumAssignment() original_total_cost = 0 start_time = time.time() for worker in range(FLAGS.vocab_size): for task in range(FLAGS.vocab_size): if worker == task: original_total_cost += matrix[worker][task] assignment.AddArcWithCost(worker, task, matrix[worker][task]) solve_status = assignment.Solve() if solve_status == assignment.OPTIMAL: id2wordid = np.zeros(FLAGS.vocab_size, dtype=np.int32) for i in range(FLAGS.vocab_size): id2wordid[reader.wordid2r[i] * FLAGS.lightrnn_size + reader.wordid2c[i]] = i total_adjustion = 0 for i in range(0, assignment.NumNodes()): true_id = id2wordid[i] reader.wordid2r[true_id] = assignment.RightMate( i) // FLAGS.lightrnn_size reader.wordid2c[true_id] = assignment.RightMate( i) % FLAGS.lightrnn_size if assignment.RightMate(i) != i: total_adjustion += 1 print( "takes %.2f seconds, original_total_cost is %.2f, total_loss is %.2f, total_adjustion is %d." % (time.time() - start_time, original_total_cost, assignment.OptimalCost(), total_adjustion)) # print('Worker %d assigned to task %d. Cost = %d' % (i, assignment.RightMate(i), assignment.AssignmentCost(i))) for i in range(FLAGS.vocab_size): reader.id2wordid[reader.wordid2r[i] * FLAGS.lightrnn_size + reader.wordid2c[i]] = i elif solve_status == assignment.INFEASIBLE: print('No assignment is possible.') elif solve_status == assignment.POSSIBLE_OVERFLOW: print( 'Some input costs are too large and may cause an integer overflow.' )
k = 3200 n_eq = (len(df_raw) - (len(df_raw) % k)) / k df = df_raw.head(len(df_raw) - (len(df_raw) % k)) # Take a dividible number kmeans = KMeans(n_clusters=len(df) // k) coords = df_raw[['Longitude', 'Latitude']].dropna().values kmeans.fit(coords) cluster_centers = kmeans.cluster_centers_ dist_mat = euclidean_distances(coords, cluster_centers) cost = np.tile(dist_mat, (1, k)) # the repetition here is inefficient wp_len = len(cost) batch_len = len(cost[0]) assignment = pywrapgraph.LinearSumAssignment() for wp in range(wp_len): for batch in range(batch_len): if cost[wp][batch]: assignment.AddArcWithCost(wp, batch, int(cost[wp][batch])) solve_status = assignment.Solve() batch_ids = np.zeros(wp_len) if solve_status == assignment.OPTIMAL: for i in range(0, assignment.NumNodes()): batch_ids[i] = assignment.RightMate(i) % (batch_len // k) + 1 print(batch_ids)