def prims(fname, testing=False): ''' Run Prim's Minimum Span Tree algorithom on graph a graph defined in fname returns the total cost and the graph ''' # initialize structures g = Graph(fname, testing=testing) h = Heap(g.num_verticies) seen = [False for x in range(g.num_verticies + 1)] # Start at any vertex, the provided graphs all start at node 1, so using that source_vertex = 1 seen[source_vertex] = True h = add_edge_from_vertex(source_vertex, h, g, seen) total_cost = 0 # keep extracting the minimum cost edge from the heap and # update/create the lowest cost edges to any vertex that is connected to an edge from # the extracted vertex while len(h) > 0: vertex, cost = h.extract_min() seen[vertex] = True total_cost += cost h = add_edge_from_vertex(vertex, h, g, seen) return total_cost, g
def q2_heap(weights, lengths): ''' make and return the heap for question 2, ordered by -(weight/length) ''' h = Heap(len(weights)) for job, (w, l) in enumerate(zip(weights, lengths)): c = -(w / l) # want to extract max, so opposite h.insert(job, c) #print("{} {} {}".format(job,w,l)) return h
def q1_heap(weights, lengths): ''' make and return the heap for question 1, ordered by -(weight-length) ''' h = Heap(len(weights)) for job, (w, l) in enumerate(zip(weights, lengths)): c = -(w - l) # want to extract max, so opposite h.insert(job, c) #print("job: {:2} weight:{:2} length:{:2} cost:{:3}".format(job,w,l,c)) return h
def Dijkstra(self, s): ''' Run Dijkstra's single source shortest path starting at s Returns the distances from s to all other verticies If a vertex is not connected, then the distance to that vertex is self.longest_path ''' #@profile def add_to_heap(h, tail, explored, added_to_heap_by): ''' Add all the verticies connected to tail vertex by and edge to the heap h: Heap instance tail: vertex number explored: array of bools indicating if we have explored the vertex before added_to_heap_by: array of integers indicating the tail that added this vertex to the heap ''' for edge in self.vertex_edges_tail[tail]: head = self.edge_verticies[edge][1] if not explored[head]: proposed_length = shortest_path[tail] + self.edge_length[ edge] # if the head is already on the heap, added_to_heap_by[head]==-1 means it has not been added yet if added_to_heap_by[head] >= 0: previous_length = h.delete(head) if proposed_length > previous_length: new_length = previous_length else: new_length = proposed_length added_to_heap_by[head] = tail else: new_length = proposed_length added_to_heap_by[head] = tail h.insert(head, new_length) return added_to_heap_by shortest_path = [ self.longest_path for n in range(self.num_verticies + 1) ] explored = [False for n in range(self.num_verticies + 1)] added_to_heap_by = [-1 for n in range(self.num_verticies + 1)] explored[s] = True h = Heap(self.num_verticies) add_to_heap(h, s, explored, added_to_heap_by) while len(h) > 0: w, length = h.extract_min() shortest_path[w] = length explored[w] = True added_to_heap_by = add_to_heap(h, w, explored, added_to_heap_by) return shortest_path
def get_sum_weighted_times(h, weights, lengths, elapsed_time=0): ''' Get the sum of weighted completion times for a given heap. This handles the cases of matching costs in the heap by choosing the largest weight returns elapsed_time,weighted_times ''' weighted_times = 0 while (len(h) > 0): job, cost = h.extract_min() _, next_cost = h.view_min() if cost == next_cost: # the current cost matches the next cost # when costs match, choose by highest weight # create another heap for matching cost items, then get the min values for this new heap # it is not possible to have an incorrect order according to the parameters of the assignement because # c=w-l or c=w/l so if c and w match, l is the same. matching_heap = Heap(h.max_heap_size) matching_heap.insert(job, -weights[job]) while cost == next_cost: job, cost = h.extract_min() matching_heap.insert(job, -weights[job]) _, next_cost = h.view_min() #print("\tjob:{:3} cost: {:4} weight: {:3} length: {:3} elapsed_time:{:5} weighted_times:{:8}".format(job,cost,weights[job],lengths[job],elapsed_time,weighted_times)) while (len(matching_heap) > 0): job, cost = matching_heap.extract_min() elapsed_time += lengths[job] weighted_times += elapsed_time * weights[job] else: elapsed_time += lengths[job] weighted_times += elapsed_time * weights[job] #print("job:{:3} cost: {:4} weight: {:3} length: {:3} elapsed_time:{:5} weighted_times:{:8}".format(job,cost,weights[job],lengths[job],elapsed_time,weighted_times)) return elapsed_time, weighted_times
def generate_code(self): ''' Generate the Huffman code for the dataset Stores the parent of each node in self.parents ''' self.heap = Heap(self.num_symbols**2) # way overkill and excessive allocation for idx,weight in enumerate(self.weights): self.heap.insert(idx,weight) while len(self.heap) > 2: p,_ = self.heap.extract_min() q,_ = self.heap.extract_min() w = self.weights[p] + self.weights[q] # add the new node to tracking parent_idx = len(self.parents) # note 0 based indexing here self.weights.append(w) self.parents.append(parent_idx) self.parents[p] = parent_idx self.parents[q] = parent_idx self.heap.insert(parent_idx,w)
class Huffman(): ''' Basic implementation to generate Huffman codes The weight for each node is stored in an array self.weights, 0 indexed by node id The 'parent' of each node in the binary tree form of the code is stored in self.parents. This ''' def __init__(self,fname,testing=False): ''' read in the input and optional solution data (if testing) ''' self.fname = fname self.testing = testing with open(fname,'r') as f: data = f.readlines() self.num_symbols = int(data[0]) self.weights = [int(x) for x in data[1:]] self.parents = [x for x in range(self.num_symbols)] if testing: fname = fname.replace("input","output") with open(fname,'r') as f: data = f.readlines() self.solution_1 = int(data[0]) self.solution_2 = int(data[1]) def generate_code(self): ''' Generate the Huffman code for the dataset Stores the parent of each node in self.parents ''' self.heap = Heap(self.num_symbols**2) # way overkill and excessive allocation for idx,weight in enumerate(self.weights): self.heap.insert(idx,weight) while len(self.heap) > 2: p,_ = self.heap.extract_min() q,_ = self.heap.extract_min() w = self.weights[p] + self.weights[q] # add the new node to tracking parent_idx = len(self.parents) # note 0 based indexing here self.weights.append(w) self.parents.append(parent_idx) self.parents[p] = parent_idx self.parents[q] = parent_idx self.heap.insert(parent_idx,w) def get_codeword_lengths_range(self): ''' return a tuple of the min and max lengths of the codewords ''' def _get_parent_count(self,idx): ''' Recursive sub function that returns the count to the root node ''' # basecase is root node if idx == self.parents[idx]: self.codeword_lengths[idx] = 1 return 1 elif self.codeword_lengths[idx] >= 0: # cached value return self.codeword_lengths[idx] else: count = _get_parent_count(self,self.parents[idx]) self.codeword_lengths[idx] = count +1 return self.codeword_lengths[idx] self.codeword_lengths = [-1 for _ in range(len(self.parents))] for idx in range(self.num_symbols): self.codeword_lengths[idx] = _get_parent_count(self,idx) min_len = self.num_symbols max_len = 0 for idx in range(self.num_symbols): min_len = min(min_len,self.codeword_lengths[idx]) max_len = max(max_len,self.codeword_lengths[idx]) return (min_len,max_len)