def solve_puzzle(master, output, height, width, slaves): global HEIGHT, WIDTH, level HEIGHT = height WIDTH = width level = 0 sc = SparkContext(master, "python") """ YOUR CODE HERE """ """ YOUR MAP REDUCE PROCESSING CODE HERE """ solution = Sliding.solution(WIDTH, HEIGHT) sol = Sliding.board_to_hash(WIDTH, HEIGHT, solution) data = sc.parallelize([ (sol, level), ]) counter = 0 curLen = 1 while (counter < curLen): level += 1 data = data.flatMap(bfs_flat_map) if (level % 12 == 0): data = data.partitionBy(PARTITION_COUNT) data = data.reduceByKey(bfs_reduce) if (level % 6 == 0): counter = curLen curLen = data.count() """ YOUR OUTPUT CODE HERE """ data.coalesce(slaves).saveAsTextFile(output) sc.stop()
def solve_puzzle(master, output, height, width, slaves): global HEIGHT, WIDTH, level HEIGHT=height WIDTH=width level = 0 sc = SparkContext(master, "python") """ YOUR CODE HERE """ sol_board = Sliding.solution(WIDTH, HEIGHT) sol = Sliding.board_to_hash(WIDTH, HEIGHT, sol_board) all_sols = sc.parallelize([(sol, level)]) #create an RDD before_count = 1 k = 0 #counter for iterations of partitionBy c = 0 #counter for iterations of count() while True: level += 1 all_sols = all_sols.flatMap(bfs_map) if k%4 == 0: #every 4 iterations, use parititionBy all_sols = all_sols.partitionBy(PARTITION_COUNT) all_sols = all_sols.reduceByKey(bfs_reduce) if c%2 == 0: #every 2 iterations, use count() after_count = all_sols.count() if before_count == after_count: break before_count = after_count k += 1 c += 1 """ YOUR OUTPUT CODE HERE """ all_sols = all_sols.map(lambda a: (a[1], a[0])).sortByKey() all_sols.coalesce(slaves).saveAsTextFile(output) sc.stop()
def bfs_map(value): """ YOUR CODE HERE """ return_list = [value] children = Sliding.children(HEIGHT, WIDTH, Sliding.hash_to_board(WIDTH, HEIGHT, value[0])) for child in children: return_list.append((Sliding.board_to_hash(WIDTH, HEIGHT, child), value[1]+1)) return return_list
def solve_puzzle(master, output, height, width, slaves): global HEIGHT, WIDTH, level HEIGHT=height WIDTH=width level = 0 sc = SparkContext(master, "python") """ YOUR CODE HERE """ """ YOUR MAP REDUCE PROCESSING CODE HERE """ solution=Sliding.solution(WIDTH, HEIGHT) sol = Sliding.board_to_hash(WIDTH, HEIGHT, solution) data = sc.parallelize([(sol,level),]) counter = 0 curLen = 1 while(counter < curLen): level += 1 data = data.flatMap(bfs_flat_map) if (level% 12 == 0): data = data.partitionBy(PARTITION_COUNT) data = data.reduceByKey(bfs_reduce) if (level% 6 == 0): counter = curLen curLen = data.count() """ YOUR OUTPUT CODE HERE """ data.coalesce(slaves).saveAsTextFile(output) sc.stop()
def solve_puzzle(master, output, height, width, slaves): global HEIGHT, WIDTH, level HEIGHT=height WIDTH=width level = 0 sc = SparkContext(master, "python") """ YOUR CODE HERE """ sol = Sliding.board_to_hash(WIDTH, HEIGHT, Sliding.solution(WIDTH, HEIGHT)) RDD = sc.parallelize([(sol,level)]) count = RDD.count() RDD_count = 0 search = True k = 1 """ YOUR MAP REDUCE PROCESSING CODE HERE """ while search: if k % 3== 0: RDD = RDD.flatMap(bfs_map).partitionBy(PARTITION_COUNT).reduceByKey(bfs_reduce) #PUT PARTITION_COUNT FOR 16 else: RDD = RDD.flatMap(bfs_map).reduceByKey(bfs_reduce) if k % 2 == 0: RDD_count = RDD.count() if RDD_count == count: search = False count = RDD_count k = k + 1 level = level + 1 """ YOUR OUTPUT CODE HERE """ RDD = RDD.map(swap_map) RDD.coalesce(slaves).saveAsTextFile(output) #outputLst = RDD.collect() #for elem in outputLst: #output(str(elem[0]) + " " + str(elem[1])) #output the elements sc.stop()
def solve_puzzle(master, output, height, width, slaves): global HEIGHT, WIDTH, level HEIGHT = height WIDTH = width level = 0 sc = SparkContext(master, "python") sol = Sliding.solution(WIDTH, HEIGHT) """ YOUR CODE HERE """ sol = Sliding.board_to_hash(WIDTH, HEIGHT, sol) new_visited = [(sol, level)] new_visited = sc.parallelize(new_visited) num = 1 #while there are still (k, v) pairs at the current level while num: #use += as we do not retain board sets not at the global level #in our mapping function new_visited += new_visited.flatMap(bfs_map) if level % 4 == 3: # only reduce and filter every other iteration for performance reasons new_visited = new_visited.reduceByKey(bfs_reduce) new_visited = new_visited.partitionBy( PARTITION_COUNT) #figure out how to use hash num = new_visited.filter(filter_func).count( ) # count the number of elements in the RDD at the current level level += 1 # Debuggin purposes print("\n\n\nLevel " + str(level) + '\n\n\n') """ YOUR OUTPUT CODE HERE """ new_visited.coalesce(slaves).saveAsTextFile(output) sc.stop()
def solve_puzzle(master, output, height, width, slaves): global HEIGHT, WIDTH, level HEIGHT=height WIDTH=width level = 0 sc = SparkContext(master, "python") sol = Sliding.solution(WIDTH, HEIGHT) """ YOUR CODE HERE """ sol = Sliding.board_to_hash(WIDTH, HEIGHT, sol) new_visited = [(sol, level)] new_visited = sc.parallelize(new_visited) num = 1 #while there are still (k, v) pairs at the current level while num: #use += as we do not retain board sets not at the global level #in our mapping function new_visited += new_visited.flatMap(bfs_map) if level % 4 == 3: # only reduce and filter every other iteration for performance reasons new_visited = new_visited.reduceByKey(bfs_reduce) new_visited = new_visited.partitionBy(PARTITION_COUNT) #figure out how to use hash num = new_visited.filter(filter_func).count() # count the number of elements in the RDD at the current level level += 1 # Debuggin purposes print("\n\n\nLevel " + str(level) + '\n\n\n') """ YOUR OUTPUT CODE HERE """ new_visited.coalesce(slaves).saveAsTextFile(output) sc.stop()
def solve_puzzle(master, output, height, width, slaves): global HEIGHT, WIDTH, level HEIGHT = height WIDTH = width level = 0 sc = SparkContext(master, "python") """ YOUR CODE HERE """ # Global constants that will be shared across all map and reduce instances. # You can also reference these in any helper functions you write. global HEIGHT, WIDTH, level # Initialize global constants HEIGHT = height WIDTH = width level = 0 # this "constant" will change, but it remains constant for every MapReduce job # The solution configuration for this sliding puzzle. You will begin exploring the tree from this node sol = Sliding.solution(WIDTH, HEIGHT) """ YOUR MAP REDUCE PROCESSING CODE HERE """ soln = sc.parallelize([(Sliding.board_to_hash(WIDTH, HEIGHT, sol), 0)]) num = 1 temp = 0 while (temp != num): if (level % 16 == 0): soln = soln.partitionBy(PARTITION_COUNT, hash) level = level + 1 soln = soln.flatMap(bfs_map).reduceByKey(bfs_reduce) temp = num num = soln.count() """ YOUR OUTPUT CODE HERE """ soln.coalesce(slaves).saveAsTextFile(output) sc.stop()
def bfs_map(value): """ YOUR CODE HERE """ result = [] if value[1] == level - 1: result = Sliding.children(WIDTH, HEIGHT, Sliding.hash_to_board(WIDTH, HEIGHT, value[0])) for i in range(0, len(result)): result[i] = (Sliding.board_to_hash(WIDTH, HEIGHT, result[i]), level) result.append(value) return result
def bfs_map(value): #value is the (puzzle, level) tuple """ YOUR CODE HERE """ lst = [(value)] if value[1] == level: children = Sliding.children(WIDTH,HEIGHT,Sliding.hash_to_board(WIDTH, HEIGHT, value[0])) for child in children: lst.append((Sliding.board_to_hash(WIDTH, HEIGHT, child),level+1)) return lst return lst
def solve_puzzle(master, output, height, width, slaves): global HEIGHT, WIDTH, level HEIGHT = height WIDTH = width level = 0 sc = SparkContext(master, "python") """ YOUR CODE HERE """ sol = Sliding.solution(WIDTH, HEIGHT) hashID = Sliding.board_to_hash( WIDTH, HEIGHT, sol) #board(obj) to hash(int) #either sol or value[0], is this here? rdd = sc.parallelize([(hashID, level)]) prev_count = 0 count = rdd.count() k = 0 i = 0 #put this here since I am assuming this part? next try uncomment this #hashID = Sliding.board_to_hash(WIDTH, HEIGHT, sol) #board(obj) to hash(int) #either sol or value[0], is this here? while prev_count < count: rdd = rdd.flatMap(bfs_map) if k % 4 == 0: #board = Sliding.hash_to_board(WIDTH, HEIGHT, hashID) rdd = rdd.partitionBy(16, hash) #else try Sliding.board_to_hash rdd = rdd.reduceByKey(bfs_reduce) level += 1 if i % 4 == 0: prev_count = count count = rdd.count() k += 1 i += 1 boardState = Sliding.board_to_hash( WIDTH, HEIGHT, sol ) #board(obj) to hash(int) #either sol or value[0], is this here?, so it is an int #PARTITION_COUNT = slaves #slaves = 12 #output = str(pos[1]) + " " + str(pos[0]) rdd.coalesce(slaves).saveAsTextFile( output ) # Let NUM_WORKERS be the number of workers (6 or 12), this is the new way IS IT SLAVES sc.stop()
def bfs_map(value): """ YOUR CODE HERE """ if (value[1] != (level - 1)): return [value] else: children = Sliding.children(WIDTH, HEIGHT, Sliding.hash_to_board(WIDTH,HEIGHT,value[0])) childList = [value] for child in children: childList.append((Sliding.board_to_hash(WIDTH,HEIGHT,child), level)) return childList
def bfs_map(value): """ YOUR CODE HERE """ mapVal = [] mapVal.append((value[0], value[1])) if value[1] == level: pos = Sliding.hash_to_board(WIDTH, HEIGHT, value[0]) for cpos in Sliding.children(WIDTH, HEIGHT, pos): cpos2 = Sliding.board_to_hash(WIDTH, HEIGHT, cpos) mapVal.append((cpos2,level+1)) return mapVal
def bfs_map(value): items = [] if value[1] < level: items.append((value[0],value[1])) if value[1] == level-1: children_board = Sliding.hash_to_board(WIDTH, HEIGHT, value[0]) children = Sliding.children(WIDTH, HEIGHT, children_board) for child in children: items.append((Sliding.board_to_hash(WIDTH, HEIGHT, child), value[1] + 1)) return items
def bfs_map(value): """ YOUR CODE HERE """ result = [] if value[1] == level - 1: result = Sliding.children( WIDTH, HEIGHT, Sliding.hash_to_board(WIDTH, HEIGHT, value[0])) for i in range(0, len(result)): result[i] = (Sliding.board_to_hash(WIDTH, HEIGHT, result[i]), level) result.append(value) return result
def bfs_map(value): #value is the (puzzle, level) tuple """ YOUR CODE HERE """ lst = [(value)] if value[1] == level: children = Sliding.children( WIDTH, HEIGHT, Sliding.hash_to_board(WIDTH, HEIGHT, value[0])) for child in children: lst.append((Sliding.board_to_hash(WIDTH, HEIGHT, child), level + 1)) return lst return lst
def solve_puzzle(master, output, height, width, slaves): global HEIGHT, WIDTH, level HEIGHT = height WIDTH = width level = 0 sc = SparkContext(master, "python") """ YOUR CODE HERE """ sol = Sliding.solution(WIDTH, HEIGHT) rdd = sc.parallelize([(sol, level)]) prev_count = 0 count = rdd.count() k = 0 i = 0 #put this here since I am assuming this part? #hashID = Sliding.board_to_hash(WIDTH, HEIGHT, sol) #board(obj) to hash(int) #either sol or value[0], is this here? while prev_count < count: rdd = rdd.flatMap(bfs_map) if k % 4 == 0: rdd = rdd.partitionBy(16, partitionHash) rdd = rdd.reduceByKey(bfs_reduce) level += 1 if i % 4 == 0: prev_count = count count = rdd.count() k += 1 i += 1 #nodes is an rdd #nodes.coalesce(NUM_WORKERS).saveAsTextFile(str(WIDTH) + "x" + str(HEIGHT) + "-output") # Let NUM_WORKERS be the number of workers (6 or 12) # replace num_workers with slaves? #rdd.coalesce(slaves).saveAsTextFile(output) # Let NUM_WORKERS be the number of workers (6 or 12), this is the new way IS IT SLAVES # for top line is NUM_WORKERS GLOBAL VARIABLE PARTITION_COUNT, or is it 6, 12 depending on some sort of if condition # ask manny for clarrification #hash_to_board(WIDTH, HEIGHT, hashID) #hash(int) to board(obj) #should be what we stored in hashID, this should be at top in map function right # do I save this instead as rdd? ask manny #hashID = board_to_hash(WIDTH, HEIGHT, value[0]) #board(obj) to hash(int) #either sol or value[0], is this here? #not sure if need to do #rdd = rdd.collect() #positions = rdd.collect() #positions = sorted(positions, key=lambda kv: kv[1]) #for pos in positions: #output = hashID = Sliding.board_to_hash( WIDTH, HEIGHT, sol) #board(obj) to hash(int) #either sol or value[0], is this here? slaves = 6 rdd.coalesce(slaves).saveAsTextFile( output ) # Let NUM_WORKERS be the number of workers (6 or 12), this is the new way IS IT SLAVES sc.stop()
def solve_sliding_puzzle(master, output, height, width): """ Solves a sliding puzzle of the provided height and width. master: specifies master url for the spark context output: function that accepts string to write to the output file height: height of puzzle width: width of puzzle """ # Set up the spark context. Use this to create your RDD sc = SparkContext(master, "python") # Global constants that will be shared across all map and reduce instances. # You can also reference these in any helper functions you write. global HEIGHT, WIDTH, level, prev_len, PARTITION_COUNT # Initialize global constants HEIGHT = height WIDTH = width level = 0 # this "constant" will change, but it remains constant for every MapReduce job # The solution configuration for this sliding puzzle. You will begin exploring the tree from this node sol = Sliding.solution(WIDTH, HEIGHT) level_nodes = sc.parallelize([(Sliding.board_to_hash(WIDTH, HEIGHT, sol), 0)]) PARTITION_COUNT = 16 prev_len = 0 count = 0 while True: level_nodes = level_nodes.flatMap(bfs_map).reduceByKey(bfs_reduce) next_len = level_nodes.count() if next_len == prev_len: break prev_len = next_len count += 1 if count == 10: count = 0 level_nodes = level_nodes.partitionBy(PARTITION_COUNT) """ YOUR MAP REDUCE PROCESSING CODE HERE """ # level = [] # def add_to_string(obj): # output(str(obj)) level_nodes = level_nodes.map(lambda x : (x[1], x[0])) output_string = "" for l in level_nodes.sortByKey(True).collect(): output_string += str(l) + "\n" output(output_string) # level_nodes.sortByKey(True).coalesce(1).saveAsTextFile("output") # level_nodes.foreach(add_to_string) """ YOUR OUTPUT CODE HERE """ sc.stop()
def bfs_flat_map(value): """ YOUR CODE HERE """ re = [] value = (value[0], value[1]) re.append(value) if value[1] == (level-1): #check if its the previous level children = Sliding.children(WIDTH, HEIGHT, value[0]) #children is a list of children for each in children: each = Sliding.board_to_hash(WIDTH, HEIGHT, each) #instead of storing boards as keys, we store the corresponding hashed ints as keys re.append(tuple((each, level))) return re
def bfs_map(arg): """ YOUR CODE HERE """ if arg[1] == level: children = Sliding.children( WIDTH, HEIGHT, Sliding.hash_to_board(WIDTH, HEIGHT, arg[0])) toreturn = [arg] for position in children: toreturn.append((Sliding.board_to_hash(WIDTH, HEIGHT, position), level + 1)) return toreturn else: return [arg]
def bfs_map(value): """ YOUR CODE HERE """ if (value[1] != (level - 1)): return [value] else: children = Sliding.children( WIDTH, HEIGHT, Sliding.hash_to_board(WIDTH, HEIGHT, value[0])) childList = [value] for child in children: childList.append((Sliding.board_to_hash(WIDTH, HEIGHT, child), level)) return childList
def bfs_flat_map(value): """ YOUR CODE HERE """ re = [] value = (value[0], value[1]) re.append(value) if value[1] == (level - 1): #check if its the previous level children = Sliding.children(WIDTH, HEIGHT, value[0]) #children is a list of children for each in children: each = Sliding.board_to_hash(WIDTH, HEIGHT, each) #instead of storing boards as keys, we store the corresponding hashed ints as keys re.append(tuple((each, level))) return re
def solve_puzzle(master, output, height, width, slaves): global HEIGHT, WIDTH, level HEIGHT=height WIDTH=width level = 0 sc = SparkContext(master, "python") """ YOUR CODE HERE """ sol = Sliding.solution(WIDTH, HEIGHT) hashID = Sliding.board_to_hash(WIDTH, HEIGHT, sol) #board(obj) to hash(int) #either sol or value[0], is this here? rdd = sc.parallelize([(hashID, level)]) prev_count = 0 count = rdd.count() k = 0 i = 0 #put this here since I am assuming this part? next try uncomment this #hashID = Sliding.board_to_hash(WIDTH, HEIGHT, sol) #board(obj) to hash(int) #either sol or value[0], is this here? while prev_count < count: rdd = rdd.flatMap(bfs_map) if k % 4 == 0: #board = Sliding.hash_to_board(WIDTH, HEIGHT, hashID) rdd = rdd.partitionBy(16, hash) #else try Sliding.board_to_hash rdd = rdd.reduceByKey(bfs_reduce) level += 1 if i % 4 == 0: prev_count = count count = rdd.count() k += 1 i += 1 boardState = Sliding.board_to_hash(WIDTH, HEIGHT, sol) #board(obj) to hash(int) #either sol or value[0], is this here?, so it is an int #PARTITION_COUNT = slaves #slaves = 12 #output = str(pos[1]) + " " + str(pos[0]) rdd.coalesce(slaves).saveAsTextFile(output) # Let NUM_WORKERS be the number of workers (6 or 12), this is the new way IS IT SLAVES sc.stop()
def solve_puzzle(master, output, height, width, slaves): global HEIGHT, WIDTH, level HEIGHT=height WIDTH=width level = 0 sc = SparkContext(master, "python") """ YOUR CODE HERE """ sol = Sliding.solution(WIDTH, HEIGHT) rdd = sc.parallelize([(sol, level)]) prev_count = 0 count = rdd.count() k = 0 i = 0 #put this here since I am assuming this part? #hashID = Sliding.board_to_hash(WIDTH, HEIGHT, sol) #board(obj) to hash(int) #either sol or value[0], is this here? while prev_count < count: rdd = rdd.flatMap(bfs_map) if k % 4 == 0: rdd = rdd.partitionBy(16, partitionHash) rdd = rdd.reduceByKey(bfs_reduce) level += 1 if i % 4 == 0: prev_count = count count = rdd.count() k += 1 i += 1 #nodes is an rdd #nodes.coalesce(NUM_WORKERS).saveAsTextFile(str(WIDTH) + "x" + str(HEIGHT) + "-output") # Let NUM_WORKERS be the number of workers (6 or 12) # replace num_workers with slaves? #rdd.coalesce(slaves).saveAsTextFile(output) # Let NUM_WORKERS be the number of workers (6 or 12), this is the new way IS IT SLAVES # for top line is NUM_WORKERS GLOBAL VARIABLE PARTITION_COUNT, or is it 6, 12 depending on some sort of if condition # ask manny for clarrification #hash_to_board(WIDTH, HEIGHT, hashID) #hash(int) to board(obj) #should be what we stored in hashID, this should be at top in map function right # do I save this instead as rdd? ask manny #hashID = board_to_hash(WIDTH, HEIGHT, value[0]) #board(obj) to hash(int) #either sol or value[0], is this here? #not sure if need to do #rdd = rdd.collect() #positions = rdd.collect() #positions = sorted(positions, key=lambda kv: kv[1]) #for pos in positions: #output = hashID = Sliding.board_to_hash(WIDTH, HEIGHT, sol) #board(obj) to hash(int) #either sol or value[0], is this here? slaves = 6 rdd.coalesce(slaves).saveAsTextFile(output) # Let NUM_WORKERS be the number of workers (6 or 12), this is the new way IS IT SLAVES sc.stop()
def solve_puzzle(master, output, height, width, slaves): global HEIGHT, WIDTH, level HEIGHT=height WIDTH=width level = 0 sc = SparkContext(master, "python") sol = Sliding.solution(WIDTH, HEIGHT) frontierRDD = sc.parallelize([(Sliding.board_to_hash(WIDTH, HEIGHT, sol), 0)]) boardsRDD = sc.parallelize([(Sliding.board_to_hash(WIDTH, HEIGHT, sol), 0)]) #while frontierRDD.count() != 0: while True: level += 1 # get all frontier nodes as a flattened list of ONLY (key), NOT (key, value) frontierRDD = frontierRDD.flatMap(lambda v: Sliding.children(WIDTH, HEIGHT, Sliding.hash_to_board(WIDTH, HEIGHT, v[0]))) # add new (chilq, level) pairs to all boards boardsRDD = boardsRDD + frontierRDD.map(lambda v: (Sliding.board_to_hash(WIDTH, HEIGHT, v), level)) #boardsRDD = boardsRDD.partitionBy(8, partitionFunc) # only keep board seen at lowest level boardsRDD = boardsRDD.reduceByKey(lambda v1, v2: min(v1, v2)) # frontier is only the boards that have the current level frontierRDD = boardsRDD.filter(lambda v: v[1] == level) # magic voodoo that it doesn't work without boardsRDD = boardsRDD.partitionBy(slaves, lambda v: v) frontierRDD = frontierRDD.partitionBy(slaves, lambda v: v) if level % 4 == 0 and frontierRDD.count() == 0: break boardsRDD.coalesce(slaves).saveAsTextFile(output) sc.stop()
def solve_puzzle(master, output, height, width, slaves): """ Solves a sliding puzzle of the provided height and width. master: specifies master url for the spark context output: function that accepts string to write to the output file height: height of puzzle width: width of puzzle """ # Set up the spark context. Use this to create your RDD sc = SparkContext(master, "python") # Global constants that will be shared across all map and reduce instances. # You can also reference these in any helper functions you write. global HEIGHT, WIDTH, level # Initialize global constants HEIGHT = height WIDTH = width level = 0 # this "constant" will change, but it remains constant for every MapReduce job #THIS MEANS THAT MAPREDUCE FROM LEVEL TO NEXT LEVEL # The solution configuration for this sliding puzzle. You will begin exploring the tree from this node sol = Sliding.solution(WIDTH, HEIGHT) """ YOUR MAP REDUCE PROCESSING CODE HERE """ rdd = [(Sliding.board_to_hash(WIDTH, HEIGHT, sol), 0)] prevcount = 0 c = 1 rdd = sc.parallelize(rdd) k = 0 j = 0 while c != prevcount: if k == 16: rdd = rdd.partitionBy(PARTITION_COUNT, hash) k = 0 rdd = rdd.flatMap(bfs_map) \ .reduceByKey(bfs_reduce, numPartitions=16) if j == 8: prevcount = c c = rdd.count() j = 0 j += 1 level += 1 k += 1 #finalsolution = rdd.collect() #sc.stop() rdd.coalesce(slaves).saveAsTextFile(output)
def solve_puzzle(master, output, height, width, slaves): global HEIGHT, WIDTH, level HEIGHT=height WIDTH=width level = 0 sc = SparkContext(master, "python") """ YOUR CODE HERE """ sol = Sliding.board_to_hash(WIDTH, HEIGHT, Sliding.solution(WIDTH, HEIGHT)) RDD = sc.parallelize([(sol, level)]) counter = RDD.count() k, comp, data = 0, 0, 0 repar = 0 bound = (math.sqrt(WIDTH * HEIGHT)-1) * math.log(math.factorial(WIDTH * HEIGHT),2) # running mapreduce under lower bound while k <= bound: RDD = RDD.flatMap(bfs_map) if repar % 8 == 0: RDD = RDD.partitionBy(PARTITION_COUNT, hash) RDD = RDD.reduceByKey(bfs_reduce) level += 1 k += 1 repar += 1 k = 0 repar = 0 # running mapreduce until the number of elements in RDD stops increasing while True: RDD = RDD.flatMap(bfs_map) if repar % 8 == 0: RDD = RDD.partitionBy(PARTITION_COUNT, hash) RDD = RDD.reduceByKey(bfs_reduce) if k % 3 == 0: comp = RDD.count() if comp == counter: break else: counter = comp level += 1 k += 1 repar += 1 # output code RDD = RDD.map(revert_back) RDD.coalesce(6).saveAsTextFile(output) sc.stop()
def bfs_map(value): """ Takes in a key, value pair of (board state, level), creates all of the children of that board state if is on the same level as the global level, and returns them in a list. """ """ YOUR CODE HERE """ child_list = [] #Check if we are at the right level, so then we can make children of only those boards if value[1] == level: temp = Sliding.hash_to_board(WIDTH, HEIGHT, value[0]) iter_list = Sliding.children(WIDTH, HEIGHT, temp) for child in iter_list: child_list += [(Sliding.board_to_hash(WIDTH,HEIGHT, child), level + 1)] #Spark map only lets us return a list if we want multiple things. #Unlike Hadoop I believe which allows us to emit return child_list
def bfs_map(value): """ YOUR CODE HERE """ prev = [(value[0], value[1])] if value[1] == level: #convert from int to board # do I save it, not sure? ask Manny , using hashID since it was declared down? hashID = Sliding.board_to_hash(WIDTH, HEIGHT, prev[0][0]) #value[0] currBoard = Sliding.hash_to_board(WIDTH, HEIGHT, hashID) # ask manny if this is the correct to call this method # also what would number be? children = Sliding.children(WIDTH, HEIGHT, currBoard) # not sure value[0], currBoard #nextID = Sliding.board_to_hash(WIDTH, HEIGHT, children) curr = [] for i in range(0, len(children)): curr.append((children[i], level+1)) return prev + curr #nextID = Sliding.board_to_hash(WIDTH, HEIGHT, children[0]) #children[0] return prev
def solve_puzzle(master, output, height, width, slaves): global HEIGHT, WIDTH, level HEIGHT = height WIDTH = width level = 0 sc = SparkContext(master, "python") """ YOUR CODE HERE """ sol = Sliding.board_to_hash(WIDTH, HEIGHT, Sliding.solution(WIDTH, HEIGHT)) RDD = sc.parallelize([(sol, level)]) counter = RDD.count() k, comp, data = 0, 0, 0 repar = 0 bound = (math.sqrt(WIDTH * HEIGHT) - 1) * math.log( math.factorial(WIDTH * HEIGHT), 2) # running mapreduce under lower bound while k <= bound: RDD = RDD.flatMap(bfs_map) if repar % 8 == 0: RDD = RDD.partitionBy(PARTITION_COUNT, hash) RDD = RDD.reduceByKey(bfs_reduce) level += 1 k += 1 repar += 1 k = 0 repar = 0 # running mapreduce until the number of elements in RDD stops increasing while True: RDD = RDD.flatMap(bfs_map) if repar % 8 == 0: RDD = RDD.partitionBy(PARTITION_COUNT, hash) RDD = RDD.reduceByKey(bfs_reduce) if k % 3 == 0: comp = RDD.count() if comp == counter: break else: counter = comp level += 1 k += 1 repar += 1 # output code RDD = RDD.map(revert_back) RDD.coalesce(6).saveAsTextFile(output) sc.stop()
def bfs_map(value): """ Takes in a key, value pair of (board state, level), creates all of the children of that board state if is on the same level as the global level, and returns them in a list. """ """ YOUR CODE HERE """ child_list = [] #Check if we are at the right level, so then we can make children of only those boards if value[1] == level: temp = Sliding.hash_to_board(WIDTH, HEIGHT, value[0]) iter_list = Sliding.children(WIDTH, HEIGHT, temp) for child in iter_list: child_list += [(Sliding.board_to_hash(WIDTH, HEIGHT, child), level + 1)] #Spark map only lets us return a list if we want multiple things. #Unlike Hadoop I believe which allows us to emit return child_list
def bfs_flat_map(value): """ YOUR CODE HERE """ # childrenLst=[] # childrenLst.append(value) # for child in Sliding.children(WIDTH,HEIGHT,Sliding.hash_to_board(WIDTH, HEIGHT, value[0])): # pair=[] # pair.append(Sliding.board_to_hash(WIDTH, HEIGHT, child)) # pair.append(level) # childrenLst.append(tuple(pair)) # return childrenLst childrenLst = [] childrenLst.append((value[0], value[1])) if(value[1] == level - 1): for child in Sliding.children(WIDTH, HEIGHT, Sliding.hash_to_board(WIDTH,HEIGHT, value[0])): childrenLst.append((Sliding.board_to_hash(WIDTH,HEIGHT,child), value[1]+1)) return childrenLst
def bfs_map(value): """ value: Taken an element from RDD bfs_map function only applies children() to each element at the last level in RDD; return :If an element is not at the last level, then it will be put in an empty list and return; return :If an element is at the last level, then its children and the element will be put into an empty list and return; """ lst = [] lst.append(value) value = (Sliding.hash_to_board(WIDTH,HEIGHT,value[0]), value[1]) if (value[1] < level): return lst children = Sliding.children(WIDTH, HEIGHT, value[0]) for each in children: lst.append(((Sliding.board_to_hash(WIDTH, HEIGHT, tuple(each))), value[1]+1)) return lst
def bfs_map(value): """ value: Taken an element from RDD bfs_map function only applies children() to each element at the last level in RDD; return :If an element is not at the last level, then it will be put in an empty list and return; return :If an element is at the last level, then its children and the element will be put into an empty list and return; """ lst = [] lst.append(value) value = (Sliding.hash_to_board(WIDTH, HEIGHT, value[0]), value[1]) if (value[1] < level): return lst children = Sliding.children(WIDTH, HEIGHT, value[0]) for each in children: lst.append(((Sliding.board_to_hash(WIDTH, HEIGHT, tuple(each))), value[1] + 1)) return lst
def bfs_flat_map(value): """ YOUR CODE HERE """ # childrenLst=[] # childrenLst.append(value) # for child in Sliding.children(WIDTH,HEIGHT,Sliding.hash_to_board(WIDTH, HEIGHT, value[0])): # pair=[] # pair.append(Sliding.board_to_hash(WIDTH, HEIGHT, child)) # pair.append(level) # childrenLst.append(tuple(pair)) # return childrenLst childrenLst = [] childrenLst.append((value[0], value[1])) if (value[1] == level - 1): for child in Sliding.children( WIDTH, HEIGHT, Sliding.hash_to_board(WIDTH, HEIGHT, value[0])): childrenLst.append((Sliding.board_to_hash(WIDTH, HEIGHT, child), value[1] + 1)) return childrenLst
def bfs_map(value): """ YOUR CODE HERE """ prev = [(value[0], value[1])] if value[1] == level: #convert from int to board # do I save it, not sure? ask Manny , using hashID since it was declared down? hashID = Sliding.board_to_hash(WIDTH, HEIGHT, prev[0][0]) #value[0] currBoard = Sliding.hash_to_board( WIDTH, HEIGHT, hashID) # ask manny if this is the correct to call this method # also what would number be? children = Sliding.children(WIDTH, HEIGHT, currBoard) # not sure value[0], currBoard #nextID = Sliding.board_to_hash(WIDTH, HEIGHT, children) curr = [] for i in range(0, len(children)): curr.append((children[i], level + 1)) return prev + curr #nextID = Sliding.board_to_hash(WIDTH, HEIGHT, children[0]) #children[0] return prev
def solve_puzzle(master, output, height, width, slaves): global HEIGHT, WIDTH, level HEIGHT = height WIDTH = width level = 0 sc = SparkContext(master, "python") sol = Sliding.board_to_hash(WIDTH, HEIGHT, Sliding.solution(WIDTH, HEIGHT)) """ YOUR CODE HERE """ count = 1 new_count = 2 output_rdd = sc.parallelize([(sol, 0)]) while count != new_count: level = level + 1 output_rdd = output_rdd.flatMap(bfs_map).reduceByKey(bfs_reduce) if level % 8 == 0: output_rdd = output_rdd.partitionBy(PARTITION_COUNT) count = new_count new_count = output_rdd.count() output_rdd.coalesce(slaves).saveAsTextFile(output) sc.stop()
def solve_puzzle(master, output, height, width, slaves): global HEIGHT, WIDTH, level, frontRDD HEIGHT=height WIDTH=width level = 0 sc = SparkContext(master, "python") """ YOUR CODE HERE """ # The solution configuration for this sliding puzzle. You will begin exploring the tree from this node sol = Sliding.solution(WIDTH, HEIGHT) print sol """ YOUR MAP REDUCE PROCESSING CODE HERE """ max_level = 6 frontRDD = sc.parallelize([(Sliding.board_to_hash(WIDTH, HEIGHT, sol), level)]) count = 0 prevCount = -1 while 1: if count % 8 == 0: frontRDD = frontRDD.repartition(PARTITION_COUNT) frontRDD = frontRDD.flatMap(bfs_map) frontRDD = frontRDD.reduceByKey(bfs_reduce) count = count + 1 level = level + 1 currCount = frontRDD.count() if currCount > prevCount: prevCount = currCount else: break #frontRDD = frontRDD.map(lambda x: (x[1], x[0])) #frontRDD = frontRDD.sortByKey(True, PARTITION_COUNT) """ YOUR OUTPUT CODE HERE """ frontRDD.coalesce(slaves).saveAsTextFile(output) #outList = frontRDD.collect() #for l in outList: #output(str(l[0])+" "+str(l[1])) sc.stop()
def solve_puzzle(master, output, height, width, slaves): global HEIGHT, WIDTH, level HEIGHT=height WIDTH=width level = 0 sc = SparkContext(master, "python") sol = Sliding.board_to_hash(WIDTH, HEIGHT, Sliding.solution(WIDTH, HEIGHT)) """ YOUR CODE HERE """ count = 1 new_count = 2 output_rdd = sc.parallelize([(sol, 0)]) while count != new_count: level = level + 1 output_rdd = output_rdd.flatMap(bfs_map).reduceByKey(bfs_reduce) if level % 8 == 0: output_rdd = output_rdd.partitionBy(PARTITION_COUNT) count = new_count new_count = output_rdd.count() output_rdd.coalesce(slaves).saveAsTextFile(output) sc.stop()
def solve_puzzle(master, output, height, width, slaves): global HEIGHT, WIDTH, level HEIGHT=height WIDTH=width level = 0 sc = SparkContext(master, "python") """ YOUR CODE HERE """ # Global constants that will be shared across all map and reduce instances. # You can also reference these in any helper functions you write. global HEIGHT, WIDTH, level # Initialize global constants HEIGHT=height WIDTH=width level = 0 # this "constant" will change, but it remains constant for every MapReduce job # The solution configuration for this sliding puzzle. You will begin exploring the tree from this node sol = Sliding.solution(WIDTH, HEIGHT) """ YOUR MAP REDUCE PROCESSING CODE HERE """ soln = sc.parallelize([(Sliding.board_to_hash(WIDTH,HEIGHT,sol), 0)]) num = 1 temp = 0 while (temp != num): if (level % 16 == 0): soln = soln.partitionBy(PARTITION_COUNT, hash) level = level + 1 soln = soln.flatMap(bfs_map).reduceByKey(bfs_reduce) temp = num num = soln.count() """ YOUR OUTPUT CODE HERE """ soln.coalesce(slaves).saveAsTextFile(output) sc.stop()
def solve_puzzle(master, output, height, width, slaves): global HEIGHT, WIDTH, level HEIGHT = height WIDTH = width level = 0 sc = SparkContext(master, "python") """ YOUR CODE HERE """ sol = Sliding.board_to_hash(WIDTH, HEIGHT, Sliding.solution(WIDTH, HEIGHT)) RDD = sc.parallelize([(sol, level)]) count = RDD.count() RDD_count = 0 search = True k = 1 """ YOUR MAP REDUCE PROCESSING CODE HERE """ while search: if k % 3 == 0: RDD = RDD.flatMap(bfs_map).partitionBy( PARTITION_COUNT).reduceByKey( bfs_reduce) #PUT PARTITION_COUNT FOR 16 else: RDD = RDD.flatMap(bfs_map).reduceByKey(bfs_reduce) if k % 2 == 0: RDD_count = RDD.count() if RDD_count == count: search = False count = RDD_count k = k + 1 level = level + 1 """ YOUR OUTPUT CODE HERE """ RDD = RDD.map(swap_map) RDD.coalesce(slaves).saveAsTextFile(output) #outputLst = RDD.collect() #for elem in outputLst: #output(str(elem[0]) + " " + str(elem[1])) #output the elements sc.stop()
def get_solution_hash(): return Sliding.board_to_hash(width, height, Sliding.solution(width, height))
def make_state(level, board): """Abstraction for making a state wher level and board are represented """ return (Sliding.board_to_hash(WIDTH, HEIGHT, board), level)
def make_state(level, board): """Abstraction for making a state wher level and board are represented """ return (Sliding.board_to_hash(WIDTH, HEIGHT, board), level)
def board_to_hash(board): return Sliding.board_to_hash(width, height, board)