def wait_for_config(): ''' Waits for the master to set the Job config ''' logging.info("{0} waiting for config".format(job_id)) while True: config = kv.read_store(kv_conn, job_id + '_config') if config != '\r': return config else: continue
def consolidate_output(reducer_jobids: list, output_file_path: str): ''' Consolidate reducer results and write to output file ''' reducer_output = [ kv.read_store(kv_conn, job_id + '_result').replace('\r,', '\n') for job_id in reducer_jobids ] with open(output_file_path, 'w', encoding="utf8", errors='ignore') as output: output.writelines(reducer_output)
def wait_for_reducers(reducer_jobids: list): ''' Poll and waits for Reducer Jobs to complete ''' while True: statuses = [ kv.read_store(kv_conn, job_id + '_status') for job_id in reducer_jobids ] if all(status == "DONE\r" for status in statuses): logging.debug("Reducers Completed - status\n{0}".format(statuses)) break else: continue
def get_job_status(): ''' This API route is used to get the current Job status given a Job ID Returns Job output if the Job has completed ''' try: job_id = request.args.get('jobid') logging.info("Getting status for job : {0}".format(job_id)) conn = kv.get_store_connection() status = kv.read_store(conn, "{0}_status".format(job_id)) if status == "COMPLETED\r": kv.close_store_connection(conn) return send_from_directory('output', filename='output_{}.txt'.format(job_id)) else: kv.close_store_connection(conn) return status except Exception as e: logging.error("Job status check failed : %s", e) kv.close_store_connection(conn) return "ERROR : Job status check failed"
def main(): ''' The driver function that runs the Map job ''' try: if job_id == None: logging.critical("Job Initialization Error! ABORTING") exit() # Step 1 : Wait for Map Job config config = json.loads(wait_for_config()) reducer_node = config['reducer_node'] # Step 2 : Update status as started res = kv.set_command(kv_conn, job_id + '_status', len("STARTED".encode()), "STARTED") if res != "STORED\r\n": logging.error("Status set failure : %s", res) logging.critical("ABORTING") exit() # Step 3 : Read Mapper Input message = kv.read_store(kv_conn, job_id + '_input') message_list = message.split('#\r#')[1:] map_result = [] # Step 4 : Run map job based on input file for i in range(0, len(message_list), 2): map_result = map_result + run_map(bytes( config['map_fn']), message_list[i], message_list[i + 1]) # Step 5 : Partition Map results partition_map = partition_intermediate_results(map_result, len(reducer_node), reducer_node) # Step 6 : Store Map results store_intermediate_results(partition_map) except Exception as e: logging.critical("JOB FAILED : %s", e) res = kv.set_command(kv_conn, job_id + '_status', len("FAILED".encode()), "FAILED") finally: kv.close_store_connection(kv_conn)
def main(): ''' The driver function that runs the Reduce job ''' try: if job_id == None: logging.critical("Job Initialization Error! ABORTING") exit() # Step 1 : Wait for Reduce Job config config = json.loads(wait_for_config()) partition_key = config['partition_key'] # Step 2 : Update status as started res = kv.set_command(kv_conn, job_id + '_status', len("STARTED".encode()), "STARTED") if res != "STORED\r\n": logging.error("Status set failure : %s", res) logging.critical("ABORTING") exit() # Step 3 : Read partition data from mapper message = kv.read_store(kv_conn, partition_key) # Step 4 : Sort intermediate data sorted_results = sort_intermediate_results(message) reduce_output = [] # Step 5 : Run Reduce on the sorted data reduce_fn_serialized = bytes(config['reduce_fn']) for key in sorted_results: output = run_reduce(reduce_fn_serialized, key, sorted_results[key]) reduce_output.append(output) # Step 6 : Store reduce results store_reduce_output(reduce_output) except Exception as e: logging.critical("JOB FAILED : %s", e) res = kv.set_command(kv_conn, job_id + '_status', len("FAILED".encode()), "FAILED") finally: kv.close_store_connection(kv_conn)