def execute_job(self, socket, message): if self.current_job_cap == 0: print("Worker: Ups! NO capacity for more tasks") answer = mt.Message().set_message('ERROR', { 'info': "Worker busy", 'type_error': 'WorkerBusyError' }) socket.send_string(str(answer)) return None self.current_job_cap -= 1 arguments = message.payload tracker_ip = self.current_worker_addr[0] tracker_addr_ping = (tracker_ip, mt.get_available_random_port()) tracker = JobTracker(self.worker_broadcast_addr, self.filesystem_broadcast_addr, tracker_addr_ping, tracker_ip, self.current_worker_addr, **arguments) f = lambda track: track.execute_job() procces = Process(target=f, args=(tracker, ), name='worker_procces_job') procces.start() print('Worker: ', "submited job") answer = mt.Message().set_message( 'OK', { "ping_tracker_ip": tracker_addr_ping[0], "ping_tracker_port": tracker_addr_ping[1] }) socket.send_string(str(answer)) print('Worker: ', "This is my current job capacity: ", self.current_job_cap)
def insert_block_result(self, file_sys_addr, block_id, result_urls): ''' Le asigna al block_id de esta fase, cada uno de los results url :param file_sys_addr: el address del nodo del filesystem :param block_id: el id del bloque en la tabla block :param result_urls: la lista de las url de los resultados del bloque :return: ''' first_message = mt.Message() first_message.set_message('BLOCKRESULT', {'status_db_url': self.status_db_url}) second_message = mt.Message().set_message('BLOCKRESULT', { 'block_id': block_id, 'result_url': result_urls }) # print("Insertemos el resultado del bloque: ",block_id) result_ = self.send_status_command(file_sys_addr, first_message, second_message) if result_ == -1: return self._reset_method_if_no_answer_from_fs( self.insert_block_result, file_sys_addr, block_id, result_urls) print("Insertado el resultado del bloque: ", block_id) return 0
def update_status_row(self, file_sys_addr, table_name, key_row, row_updates): ''' Actualiza de la table_name la fila con llave key los campos row_updates[0] por row_updates[1] :param file_sys_addr: :param table_name: el nombre de la tabla qu queremos actulizar :param key_row: (key_name,value) la llave de la fila que queremos actualizar :param row_updates: lista de tuplas (field_name,new_value) :return: 0 si termina en en talla ''' # print("Vamos a hacerle update a la tabla: {}, el key:{} las modificaciones: {}" # .format(table_name,key_row,row_updates)) first_message = mt.Message() first_message.set_message('UPDATEROW', {'status_db_url': self.status_db_url}) second_message = mt.Message().set_message('UPDATEROW', { 'table': table_name, 'key': key_row, 'fields_values': row_updates }) result_ = self.send_status_command(file_sys_addr, first_message, second_message) if result_ == -1: # print("Parece que no me respondio el filesystem: ",file_sys_addr) return self._reset_method_if_no_answer_from_fs( self.update_status_row, file_sys_addr, table_name, key_row, row_updates) # print("Updated status: table:{}, key: {}, updates:{}".format(table_name,key_row,row_updates)) return 0
def _answering_pings(self): context = zmq.Context() sock = context.socket(zmq.REP) sock.bind('tcp://{}:{}'.format(*self.tracker_addr_ping)) while True: message = mt.Message().get_message(sock.recv_string()) print('JobTracker: ', ' me llego este ping: ', message) sock.send_string(str(mt.Message().set_message("OK")))
def _answering_pings(self): import zmq context = zmq.Context() sock = context.socket(zmq.REP) sock.bind('tcp://{}:{}'.format(*self.task_ping_addr)) while True: message = mt.Message().get_message(sock.recv_string()) print('Task_Exc: ', ' me llego este ping: ', message) sock.send_string(str(mt.Message().set_message("OK")))
def get_line_by_line_str(self, file_sys_addr, job_phase, data_file_name='map_data', data_url=None, len_limit=500): if data_url is None: file_url = "{}/{}".format(self.job_url, data_file_name) else: file_url = data_url # print("Mandamos a hacer getdata al file: ",file_url) message = mt.Message() message.set_message("GETDATA", {'file_url': file_url, 'byte': False}) answer = mt.loop_tool(mt.try_to_send_message, mt.send_message, message, file_sys_addr, lambda: print("File system didn't respond")) if_none = self._verify_if_errors_in_fs(answer) if if_none == -1: return self._reset_method_if_no_answer_from_fs( self.get_line_by_line_str, file_sys_addr, job_phase, data_file_name, data_url, len_limit) if answer.message_name == 'OK': new_addr = (answer.payload['ip'], answer.payload['port']) message = mt.Message() message.set_message("OK") slices_url = [] i = 0 # print('Intentamos obtener las lineas') slice_, eof = self._get_lines(message, new_addr, len_limit) # print("Getlines devolvio: {},{}".format(slice_,eof)) if slice_ == -1: return self._reset_method_if_no_answer_from_fs( self.get_line_by_line_str, file_sys_addr, job_phase, data_file_name, data_url, len_limit) slices_url.append( self._save_split_get_url(file_sys_addr, i, job_phase, slice_)) # print("Intentamos obtener las lineas en el while del get_line_by_line") while not eof: slice_, eof = self._get_lines(message, new_addr, len_limit) # print("Getlines devolvio: {},{}".format(slice_, eof)) if slice_ == -1: return self._reset_method_if_no_answer_from_fs( self.get_line_by_line_str, file_sys_addr, job_phase, data_file_name, data_url, len_limit) i += 1 slices_url.append( self._save_split_get_url(file_sys_addr, i, job_phase, slice_)) is_byte = False return slices_url, is_byte
def get_status_rows(self, file_sys_addr, table_name, filters=[], catch_error=True): ''' Manda a buscar en el self.status_db_url en el filesystem, en la tabla table_name, las filas que cumplan con los filtros filters :param file_sys_addr: :param catch_error: boobleano para decidir si se captura el error en la funcion o no :param table_name: el nombre de la tabla en la cual buscamos las filas :param filters: lista de tuplas(field_name,value),osea, where field_name == value :return: lista de los resultados, la cual es una lista de diccionarios con los campos ''' # print("Request for {}'s rows".format(table_name)) first_message = mt.Message() first_message.set_message('GETROWS', {'status_db_url': self.status_db_url}) second_message = mt.Message().set_message('GETROWS', { 'table': table_name, 'filters': filters }) answer = self.send_status_command(file_sys_addr, first_message, second_message, True) if catch_error and answer != -1: result_ = self._verify_if_errors_in_fs(answer, catch_error) if result_ == -1: return self._reset_method_if_no_answer_from_fs( self.get_status_rows, file_sys_addr, table_name, filters, catch_error) elif answer == -1: return self._reset_method_if_no_answer_from_fs( self.get_status_rows, file_sys_addr, table_name, filters, catch_error) elif isinstance(answer, mt.Message) and answer.message_name == 'ERROR': return -1 lines = answer.payload['rows'] # print("Estas son las filas que me mando el filesys de: {} :{}" # .format(self.status_db_url, lines)) # print("Got lines: ",lines) return lines
def start_server(self): ''' Escucha lo que se manda por addr y en dependencia del mensaje, se ejecuta la función necesaria :return: Nothing ''' context = zmq.Context() socket = context.socket(zmq.REP) # # ip = self.current_worker_addr[0] # socket, port = mt.bind_to_random_port(socket,ip) # self.current_worker_addr = (ip,port) socket.bind('tcp://{}:{}'.format(*self.current_worker_addr)) print('Worker: ', "Running on {}".format(self.current_worker_addr)) #tratar de que hasta que no me registre no ponerme a escuchar self.register_worker() result_ = self.update_filesystem_nodes() if result_ == -1: return -1 print('Worker: Actualizamos los nodos del filesystem: ', self.file_sys_addrs) while True: print( 'Worker: ', "Esperando por comando el worker: {}:{}".format( *self.current_worker_addr)) text = socket.recv_string() print('Worker: ', 'Me llego un mensaje: ', text) message = mt.Message() message.get_message(text) function_to_execute = self.names[message.message_name] function_to_execute(socket, message)
def get_pyobj_data(self, file_sys_addr, data_file_name='map_data', data_url=None): if data_url is None: file_url = "{}/{}".format(self.job_url, data_file_name) else: file_url = data_url message = mt.Message() message.set_message("GETDATA", {'file_url': file_url, 'byte': True}) answer = mt.loop_tool(mt.try_to_send_message, mt.send_message, message, file_sys_addr, lambda: print("File system didn't respond")) if_none = self._verify_if_errors_in_fs(answer) if if_none == -1: return self._reset_method_if_no_answer_from_fs( self.get_pyobj_data, file_sys_addr, data_file_name, data_url) elif if_none == 1: time.sleep(0.5) print("Intentamos de nuevo get_pyobj_data") return self.get_pyobj_data(file_sys_addr, data_file_name, data_url) if answer.message_name == 'OK': data_and_is_byte = self._get_complete_object( answer, b'', mt.send_message_recv_byte) if data_and_is_byte == -1: return self._reset_method_if_no_answer_from_fs( self.get_pyobj_data, file_sys_addr, data_file_name, data_url) data, is_byte = data_and_is_byte # print( 'Este es el objeto que me mandaron a buscar: ',data) return data, is_byte
def answer_status(self, socket, message): answer = mt.Message().set_message( "OK", { 'current_task_cap': self.current_task_cap, 'current_job_cap': self.current_job_cap }) socket.send_string(str(answer))
def send_error_message_to_addr(self, payload, addr): error_message = mt.Message().set_message('ERROR', payload) answer = mt.loop_tool( mt.try_to_send_message, mt.send_message, error_message, addr, lambda: print('JobTracker: ', 'No me respondio: ', addr)) if answer is not None: print('JobTracker: ', 'Me respondio: ', addr) self.send_done_to_current_worker(self.current_worker_addr, 'job')
def save_key_file(self, file_sys_addr, data_url, key_value_pair): ''' Manda a hacer append al filesystem el objeto key_value_pair en el archivo data_url :param data_url: la url del archivo que se le va a hacer append :param key_value_pair: par (key,[value]). Se le pone el value en una lista para que al filesystem cuando le manden append haga += para concatenar las listas :return: 0 ''' # supongamos que podamos hacer APPENDDATA payload {file_url: blabla, byte: true} # el file sys tiene que hacerle pickle loads a lo que le mande (data0 = pickle.loads(data)) # el file sys tiene que hacer lock, luego hacer data1 = pickle.load(file) # con data en la mano hacer data1[1]+= data0 # y luego pickle.dump(data1,file) message = mt.Message() message.set_message("APPENDDATA", {'file_url': data_url, 'byte': True}) answer = mt.loop_tool(mt.try_to_send_message, mt.send_message, message, file_sys_addr, lambda: print("No me contesta el File Sys")) if_none = self._verify_if_errors_in_fs(answer) if if_none == -1: return -1 elif if_none == 1: print('Me dijeron que esta puesto un lock en :', data_url) print("Intentamos de nuevo hacer appenddata a: ", data_url) time.sleep(0.5) return self.save_key_file(file_sys_addr, data_url, key_value_pair) # print('Ahora mandare a escribir en este archivo: ', data_url) if answer.message_name == 'OK': new_addr = (answer.payload['ip'], answer.payload['port']) # print("DataHandler->>>: este es el key value pair que intentamos mandar: ",key_value_pair) answer = mt.loop_tool(mt.try_send_byte_data, mt.send_byte_data, key_value_pair, new_addr, lambda: print(" file sys did't respond")) if_none = self._verify_if_errors_in_fs(answer) if if_none == -1: return self._reset_method_if_no_answer_from_fs( self.save_key_file, file_sys_addr, data_url, key_value_pair) # answer = mt.loop_tool(mt.try_to_send_message, mt.send_byte_data, b'', new_addr, # lambda: print('Task_Exec: ', "file sys did't respond")) # # if_none = self._verify_if_errors_in_fs() # if if_none == -1: # return self._reset_method_if_no_answer_from_fs(self.save_key_file, file_sys_addr, data_url, # key_value_pair) # print("Guardamos el archivo: ", data_url) return 0
def send_done_to_current_worker(self, current_worker_addr, role): # Le mandamos a mi propio worker que terminamos la tarea print('Task_Exc: ', 'Mandamos DONE al propio worker') message = mt.Message().set_message('DONE', {'role': role}) answer = mt.loop_tool( mt.try_to_send_message, mt.send_message, message, current_worker_addr, lambda: print('Task_Exc: ', 'No me respondio mi propio worker')) if answer is not None: print('Task_Exc: ', 'Termine') exit()
def send_done_to_current_worker(self, current_worker_addr, role): # Le mandamos a mi propio worker que terminamos la tarea print('JobTracker: ', 'Mandamos DONE al propio worker') message = mt.Message().set_message('DONE', {'role': role}) answer = mt.loop_tool( mt.try_to_send_message, mt.send_message, message, current_worker_addr, lambda: print('JobTracker: ', 'No me respondio mi propio worker')) if answer is not None: print('JobTracker: ', 'Termine') print("JobTracker: ", "Le hice terminate al pinging process") self.pinging_process.join() exit()
def free_capacity(self, socket, message): print('Worker: ', "Let's free capacity!!!: ") if message.payload['role'] == 'task': if self.current_task_cap < self.limit_task: self.current_task_cap += 1 else: if self.current_job_cap < self.limit_task: self.current_job_cap += 1 print('Worker: ', "This is my current task capacity: ", self.current_task_cap) print('Worker: ', "This is my current job capacity: ", self.current_job_cap) socket.send_string(str(mt.Message().set_message("OK")))
def send_result(self, result_url): print("JobTracker: ", "Intentemos mandarle el resultado_url al cliente") message = mt.Message().set_message("DONE", {'result_url': result_url}) answer = mt.loop_tool( mt.try_to_send_message, mt.send_message, message, self.client_addr, lambda: print('JobTracker: ', "Can't send results to client", self. client_addr)) if answer is not None and answer.message_name == 'OK': print('JobTracker: ', "EL cliente recibio la url del resultado final") self.send_done_to_current_worker(self.current_worker_addr, 'job') return 0
def insert_status_block(self, file_sys_addr, block_id, slices_urls, job_phase): ''' Inserta en la tabla de Block un nuevo bloque con state=self.state y le asigna los slices_urls :param file_sys_addr: :param job_phase: 'map' o 'reduce' :param block_id: el numero del bloque respecto al piquete en bloques de la entrada :param slices_urls: la lista de las urls de los slices :return: 0 si todo fue ok ''' first_message = mt.Message() first_message.set_message('INSERTBLOCK', {'status_db_url': self.status_db_url}) block_id = '{}_{}'.format(job_phase, block_id) print("Vamos a insertar el bloque: ", block_id) second_message = mt.Message().set_message( 'INSERTBLOCK', { 'block_id': block_id, 'state': mt.slices_states[0], 'slices_id': slices_urls, 'phase': job_phase }) result_ = self.send_status_command(file_sys_addr, first_message, second_message) if result_ == -1: return self._reset_method_if_no_answer_from_fs( self.insert_status_block, file_sys_addr, block_id, slices_urls, job_phase) return 0
def insert_job_status(self, file_sys_addr, payload): ''' Inserta an la tabla job los datos del job que estan en el payload :param file_sys_addr: :param payload: es un diccionario que tiene como campos: payload{ 'status_db_url':la url del db, 'job_id' que es la llave:value, 'tracker_ip_ping':value, 'tracker_port_ping':value, 'answer_ip':value, 'answer_port':value, 'status_phase':value(getworkers,slice,waitforresults,...), 'map_data_url':value, 'result_url':value, 'job_state':value(map o reduce), 'data_type':value } :return: 0 si todo en talla ''' first_message = mt.Message() first_message.set_message('INSERTJOB', {'status_db_url': self.status_db_url}) second_message = mt.Message().set_message('INSERTJOB', payload) result_ = self.send_status_command(file_sys_addr, first_message, second_message) if result_ == -1: return self._reset_method_if_no_answer_from_fs( self.insert_job_status, file_sys_addr, payload) print("Inserted Job") return 0
def send_finished_task(self): payload = { 'worker_addr': self.current_worker_addr, 'block_id': self.block_id } print('Task_Exc: ', 'Voy a mandarle Done al tracker por: ', self.answer_addr) message = mt.Message().set_message("DONE", payload) answer = mt.loop_tool( mt.try_to_send_message, mt.send_message, message, self.answer_addr, lambda: print('Task_Exc: ', 'No pude mandarle Done al tracker')) print('Task_Exc: ', 'Esta fue la respuesta del JobTracker ', answer) if answer is None: print('Task_Exc: ', "No me respondieron") self.send_done_to_current_worker(self.current_worker_addr, 'task')
def save_block(self, file_sys_addr, data_urls, pairs, status_db_url, block_id): block_size = len(data_urls) message = mt.Message() message.set_message( "APPENDBLOCK", { 'block_size': block_size, 'status_db_url': status_db_url, 'block_id': block_id }) answer = mt.loop_tool(mt.try_to_send_message, mt.send_message, message, file_sys_addr, lambda: print("No me contesta el File Sys")) if_none = self._verify_if_errors_in_fs(answer) if if_none == -1: return self._reset_method_if_no_answer_from_fs( self.save_block, file_sys_addr, data_urls, pairs, status_db_url, block_id) new_addr = (answer.payload['ip'], answer.payload['port']) message.set_message("APPENDBLOCK", { 'data_urls': data_urls, 'pairs': pairs }) answer = mt.loop_tool(mt.try_to_send_message, mt.send_message, message, new_addr, lambda: print("No me contesta el File Sys")) if_none = self._verify_if_errors_in_fs(answer) if if_none == -1: return self._reset_method_if_no_answer_from_fs( self.save_block, file_sys_addr, data_urls, pairs, status_db_url, block_id) if isinstance(answer, mt.Message) and answer.message_name == 'OK': return 0 return self._reset_method_if_no_answer_from_fs(self.save_block, file_sys_addr, data_urls, pairs, status_db_url, block_id)
def _send_single_task_message(self, block_urls, put_byte, worker, block_id): message = mt.Message() payload = { 'job_url': self.job_url, 'job_id': self.job_id, 'function_url': self.functions_url, 'block_urls': block_urls, 'task': self.job_phase, 'answer_addr': self.tracker_addr, 'load_byte': put_byte, 'status_db_url': self.status_db_url, 'block_id': block_id } message.set_message("TASK", payload) answer = mt.loop_tool( mt.try_to_send_message, mt.send_message, message, worker, lambda: print( 'JobTracker: ', "No me respondio el worker ", worker)) return answer, block_urls
def save_split_byte_data(self, file_sys_addr, data_url, data): # print( "Esto es lo que voy a mandar a buscar: ",data_url) message = mt.Message() message.set_message("PUTDATA", { 'file_url': data_url, 'byte': True, }) answer = mt.loop_tool(mt.try_to_send_message, mt.send_message, message, file_sys_addr, lambda: print("No me contesta el File Sys")) if_none = self._verify_if_errors_in_fs(answer) if if_none == -1: return self._reset_method_if_no_answer_from_fs( self.save_split_byte_data, file_sys_addr, data_url, data) if if_none == 1: time.sleep(0.5) print("Intentamos de nuevo save_split_byte_data") return self.save_split_byte_data(file_sys_addr, data_url, data) if answer.message_name == 'OK': new_addr = (answer.payload['ip'], answer.payload['port']) answer = mt.loop_tool(mt.try_send_byte_data, mt.send_byte_data, data, new_addr, lambda: print("file sys did't respond")) if_none = self._verify_if_errors_in_fs(answer) if if_none == -1: return self._reset_method_if_no_answer_from_fs( self.save_split_byte_data, file_sys_addr, data_url, data) answer = mt.loop_tool(mt.try_to_send_message, mt.send_byte_data, b'', new_addr, lambda: print("file sys did't respond")) # # if_none = self._verify_if_errors_in_fs(answer) # if if_none == -1: # return self._reset_method_if_no_answer_from_fs(self.save_split_byte_data, file_sys_addr, data_url, data) # print( "Guardamos el archivo: ",data_url) return 0
def save_split_str_data(self, file_sys_addr, slice_url, data): message = mt.Message() message.set_message("PUTDATA", {'file_url': slice_url, 'byte': False}) answer = mt.loop_tool(mt.try_to_send_message, mt.send_message, message, file_sys_addr, lambda: print("No me contesta el File Sys")) if_none = self._verify_if_errors_in_fs(answer) if if_none == -1: return self._reset_method_if_no_answer_from_fs( self.save_split_str_data, file_sys_addr, slice_url, data) if if_none == 1: time.sleep(0.5) print("Intentamos de nuevo save_split_str_data") return self.save_split_str_data(file_sys_addr, slice_url, data) if answer.message_name == 'OK': new_addr = (answer.payload['ip'], answer.payload['port']) message = data answer = mt.loop_tool(mt.try_to_send_message, mt.send_message, message, new_addr, lambda: print("file sys did't respond")) if_none = self._verify_if_errors_in_fs(answer) if if_none == -1: return self._reset_method_if_no_answer_from_fs( self.save_split_str_data, file_sys_addr, slice_url, data) answer = mt.loop_tool(mt.try_to_send_message, mt.send_message, '', new_addr, lambda: print("file sys did't respond")) # if_none = self._verify_if_errors_in_fs(answer) # if if_none == -1: # return self._reset_method_if_no_answer_from_fs(self.save_split_str_data, file_sys_addr, slice_url, data) else: # print("Error when saving slice: ", data) return -1
def _get_complete_object(self, answer, end_character, send_message_function): new_addr = (answer.payload['ip'], answer.payload['port']) message = mt.Message() message.set_message("OK") slice_ = end_character tem_slice = end_character # print( 'Tratamos de traer la mayor cantidad de lineas') _result, eof = self._get_lines(message, new_addr, empty_chac=end_character, send_message_func=send_message_function) if _result == -1: return _result tem_slice += _result slice_ += tem_slice # print( 'Trajimos el primer trozo: ',slice_) while not eof: tem_slice = end_character _result, eof = self._get_lines( message, new_addr, empty_chac=end_character, send_message_func=send_message_function) if _result == -1: return _result tem_slice += _result slice_ += tem_slice if end_character == b'': is_byte = True data = pickle.loads(slice_) return data, is_byte return slice_, False
def wait_for_results(self, assign_tasks, workers_addrs, put_byte): context = zmqa.Context() answer_socket = context.socket(zmq.REP) try: answer_socket.bind('tcp://{}:{}'.format(*self.tracker_addr)) except zmq.error.ZMQError: print("JobTracker: ZMQERROR in wait_for_results") answer_socket.bind('tcp://{}:{}'.format( self.tracker_ip, mt.get_available_random_port(self.tracker_ip))) print('JobTracker: ', "Ahora el tracker espera por respuestas DONE por: ", self.tracker_addr) print("JobTracker: ", 'Esperando Done de: ', workers_addrs) cnt_answers = 0 while True: ans = mt.loop_tool(self._try_to_recv_done, answer_socket) if ans is None: submitted_filters = [('state', mt.slices_states[1]), ('phase', self.job_phase)] writing_filters = [('state', mt.slices_states[-2]), ('phase', self.job_phase)] print( "JobTracker: Buscamos los submitted y los writing blocks") submitted_blocks_rows = self.status_handler.get_status_rows( self.file_sys_addr, 'block', submitted_filters) writing_blocks_rows = self.status_handler.get_status_rows( self.file_sys_addr, 'block', writing_filters) print("JobTracker: ", "Ya recibimos los bloques de submitted y writing") if len(submitted_blocks_rows) == 0: if len(writing_blocks_rows) == 0: print("JobTracker: ", 'Ya todos los bloques estan en DONE') return 0 continue testing_blocks = self._get_blocks_urls_with_blocks_rows( submitted_blocks_rows) if testing_blocks == 0: answer_socket.close() return 0 # print("JobTracker: ",'ESTOS SON LOS BLOQUES QUE QUEDAN ES ESTADO SUBMITTED: ',testing_blocks) indexes_to_remove = [] for i, block in enumerate(testing_blocks): block_id = submitted_blocks_rows[i]['block_id'] worker_assigned_ping = ( submitted_blocks_rows[i]['worker_ip'], submitted_blocks_rows[i]['worker_port']) # Todo: tengo que revisar porqué me dan key error si no se han caido ningun worker print("JobTracker: ", "ESTE ES EL WORKER ASSIGN DE BLOCK_ID ", block_id, " worker: ", worker_assigned_ping) print("JobTracker: ", 'Vamos a hacerle ping a :', worker_assigned_ping, ' con addr ping: ', worker_assigned_ping) answer = mt.loop_tool( mt.try_to_send_message, mt.send_message, mt.Message().set_message("PING"), worker_assigned_ping, lambda: print("JobTracker: ", 'No me respondio: ', worker_assigned_ping), 1) if answer is not None: print( "JobTracker: Ya se que este worker sigue pinchando: ", worker_assigned_ping) indexes_to_remove.append(i) else: print("JobTracker: Parece que murio este worker: ", worker_assigned_ping) print("JobTracker: ", 'buscamos otro worker') print("JobTracker: ", 'Ya le hicimo ping a todos, pacemos a reasignar') testing_blocks = [ testing_blocks[i] for i in range(len(testing_blocks)) if i not in indexes_to_remove ] print( "JobTracker: ", 'ESTOS SON LOS BLOQUES QUE QUEDAN(MOD) En ESTADO SUBMITTED: ', testing_blocks, ' QUE NO ME RESPONDEN') if len(testing_blocks) == 0: continue workers_addrs, workers_status = self._getting_workers() assign_tasks2 = self.send_tasks_messages(testing_blocks, put_byte, workers_status, registered_block=True) for x, y in assign_tasks2.items(): assign_tasks[x] = y continue answer = mt.Message().get_message(ans) if answer.message_name == 'ERROR': # self.send_error_message_to_addr(answer.payload,self.client_addr) print("JobTracker: ", "Error por parte del worker: ", answer.payload['info']) elif answer.message_name == "DONE": worker_addr = answer.payload['worker_addr'] worker_addr = (worker_addr[0], worker_addr[1]) print('JobTracker: ', "Recibi un Done de: {}:{}".format(*worker_addr)) message = mt.Message().set_message("OK") print("JobTracker: ", "Sending OK to: ", worker_addr) answer_socket.send_string(str(message)) cnt_answers += 1
def _check_if_exists_job(self): lines = self.status_handler.get_status_rows(self.file_sys_addr, 'job', catch_error=False) if len(lines) == 0 or lines == -1: self.insert_job() return 0 line = lines[0] # si termino el job if line['status_phase'] == mt.task_phases[-1]: result_url = line['result_url'] self.send_result(result_url) # si no ha terminado el job else: # primero verificamos que aun este corriendo el tracker de ese job haciendo ping tracker_addr = (line['tracker_ip_ping'], line['tracker_port_ping']) answer_addr = (line['answer_ip'], line['answer_port']) message = mt.Message().set_message('PING') tracker_alaive = mt.loop_tool( mt.try_to_send_message, mt.send_message, message, tracker_addr, lambda: print("JobTracker: ", "No me respondio el tracker")) # verificamos la respuesta if tracker_alaive is not None and isinstance(tracker_alaive,mt.Message) \ and tracker_alaive.message_name == "OK": # aun esta corriendo el tracker, luego mandamos al cliente el addr # por donde le van a mandar la respuesta print('JobTracker: ', 'Mandamos hacer Relocate') message = mt.Message().set_message( 'RELOCATE', { 'ping_tracker_ip': tracker_addr[0], "ping_tracker_port": tracker_addr[1], "answer_ip": answer_addr[0], 'answer_port': answer_addr[1] }) answer = mt.loop_tool( mt.try_to_send_message, mt.send_message, message, self.client_addr, lambda: print( "JobTracker: ", "No me respondio el tracker")) print("JobTracker: ", 'El cliente me respondio: ', answer) self.send_done_to_current_worker(self.current_worker_addr, 'job') else: # el tracker del job no esta corriendo posiblemente pq se desconectó print( "JobTracker: ", 'El tracker de ese job:{} no esta corriendo'.format( self.job_id)) print('JobTracker: ', "Asumire la tarea") # lo próximo es hacerse cargo de la tarea # para eso lo primero que debe hacer es actualizar el status y poner su addr de ping # todo: recordarle a luiso que la llave de job es un entero, el self.job_id new_ping_addr_changes = [ ('tracker_ip_ping', self.tracker_addr_ping[0]), ('tracker_port_ping', self.tracker_addr_ping[1]) ] # mandamos a actualizar el tracker que se encargara del job self.status_handler.update_status_row(self.file_sys_addr, 'job', ('job_id', self.job_id), new_ping_addr_changes) # actualizamos el estado del job (map o reduce) self.job_phase = line['job_state'] # recordamos el tipo de dato quese estaba analizando self.data_type = line['data_type'] self.status_phase = line['status_phase'] self.load_job_methods[self.status_phase]()
def append_block(self, message, new_addr): context = zmq.Context() append_socket = context.socket(zmq.REP) try: append_socket.bind('tcp://{}:{}'.format(*new_addr)) except zmq.error.ZMQError: print("FS: ZMQ error!!!") return -1 try: answer = mt.recv_message(append_socket.recv_string, 2) except TimeoutError or MemoryError: print("FS: Timeout error en el appendblock!!!") append_socket.close() return -1 try: block_size = message.payload['block_size'] status_db_url = message.payload['status_db_url'] block_id = message.payload['block_id'] except KeyError: print("FS: Parece que hubo algun error en la entrada") append_socket.close() return -1 fields_values_done = [('state', mt.slices_states[-1])] fields_values_writing = [('state', mt.slices_states[-2])] key = ('block_id', block_id) table = 'block' # le mandamos ACK que me llego el mensaje append_socket.send_string(str(mt.Message().set_message('OK'))) append_socket.close() try: data_urls = answer.payload['data_urls'] pairs = answer.payload['pairs'] except KeyError: print("FS: Parece que hubo algun error en la entrada") return -1 # Actualizamos el estado del bloque a WRITING self._update_status_fields(fields_values_writing, key, status_db_url, table) print("FS: Vamos a hacerle append al bloque: ", block_id) # añadimos cada una de las tuplas a sus files correspondientes for i in range(block_size): try: self._append_data(data_urls[i], pairs[i]) except pickle.PicklingError or EOFError or pickle.UnpicklingError: print( "FS: Parece que hubo algun problema en el appendata del bloque: ", block_id) # Pasamos entonces a SUBMITTED al bloque self._update_status_fields([("state", mt.slices_states[1])], key, status_db_url, table) return -1 print("FS: Le hicimos append al bloque: ", block_id) # actualizamos el estado del bloque a DONE self._update_status_fields(fields_values_done, key, status_db_url, table) return 0