def send_status_command(self, file_sys_addr, first_message, second_message, get_result=False): # print("Trying to send status command") answer = mt.loop_tool(mt.try_to_send_message, mt.send_message, first_message, file_sys_addr, lambda: print("File sys didn't answer")) if_none = self._verify_if_errors_in_fs(answer) if if_none == -1: return -1 # print("Sended first message") new_addr = (answer.payload['ip'], answer.payload['port']) answer = mt.loop_tool(mt.try_to_send_message, mt.send_message, second_message, new_addr, lambda: print("File sys didn't answered")) if get_result: return answer if_none = self._verify_if_errors_in_fs(answer) if if_none == -1: return -1 # print("Sended second message") return 0
def _check_workers_status(self, workers): workers_status = {} for worker in workers: print('JobTracker: ', 'Tratemos de contactar al worker: ', worker) answer = mt.loop_tool( mt.try_to_send_message, mt.send_message, mt.Message().set_message("STATUS"), worker, lambda: print( 'JobTracker: ', 'No me respondio el worker {}:{}'.format( *worker))) if answer is not None and answer.message_name == "OK": workers_status[(worker[0], worker[1])] = answer.payload print('JobTracker: ', 'Me respondio el worker {}:{}'.format(*worker)) if len(workers_status) == 0: messsage_error = mt.Message().set_message( 'ERROR', {'info': "Can execute job, there is not workers"}) answer = mt.loop_tool( mt.try_to_send_message, mt.send_message, messsage_error, self.client_addr, lambda: print( 'JobTracker: ', 'No me respondio el cliente al ' 'mensaje de error')) print('JobTracker: ', 'No me respondio nadie, cerrando JobTracker') self.send_done_to_current_worker(self.current_worker_addr, 'job') print('JobTracker: ', 'este es el workerstatus: ', workers_status) return workers_status
def _get_lines(self, message, new_addr, len_limit=500, empty_chac='', send_message_func=mt.send_message_recv_str): slice_ = empty_chac answer = mt.loop_tool(mt.try_to_send_message, send_message_func, message, new_addr, lambda: print("File SYS didn't respond")) if_none = self._verify_if_errors_in_fs(answer) if if_none == -1: return if_none, False while len(slice_) < len_limit and answer != empty_chac: slice_ += answer answer = mt.loop_tool(mt.try_to_send_message, send_message_func, message, new_addr, lambda: print("File SYS didn't respond")) if_none = self._verify_if_errors_in_fs(answer) if if_none == -1: return if_none, False if len(slice_) >= len_limit: slice_ += answer # print("--->Data Handler: ",'En el get lines me llego este slice: ',slice_) eof = answer == empty_chac return slice_, eof
def new_put_data(self, current_file_url, file_url, is_byte): context = zmqa.Context() # print("Client: FS addr:", self.filesystem_addr) filesystem_socket = context.socket(zmq.REQ) filesystem_socket.connect("tcp://" + self.filesystem_addr[0] + ":" + self.filesystem_addr[1]) # print("Client: connected to filesystem") message = Message().set_message("PUTDATA", { 'file_url': file_url, 'byte': is_byte }) filesystem_socket.send_string(str(message)) answer = mt.loop_tool(mt.try_to_recv, filesystem_socket.recv_string) if answer == -1: print("Client: Putdata answer timed out!") filesystem_socket.close() return -1 if answer.message_name != 'OK': print("Client: No me devolvieron OK en el put data") filesystem_socket.close() return 1 filesystem_socket.close() data_addr = answer.payload["ip"] + ":" + answer.payload["port"] putdata_context = zmqa.Context() putdata_socket = putdata_context.socket(zmq.REQ) # print("Client: data addr:", data_addr) putdata_socket.connect("tcp://" + data_addr) # print("Client:putdata socket connected to " + data_addr) # en este momento mandamos el data try: with open(current_file_url, 'r') as file: lines, eof = self._read_some_lines(file, is_byte) while True: putdata_socket.send_string(lines) # print("Client: Mandamos el texto: ",lines) if eof: break # print("about to receive the data answer") answer = mt.loop_tool(mt.try_to_recv, putdata_socket.recv_string) if answer.message_name != "OK": print( "Client: Data socket answer timed out. Something wrong sending the functions file" ) return 1 if answer == -1: print("Client: Data Message timed out") putdata_socket.close() return -1 lines, eof = self._read_some_lines(file, is_byte) # data_socket.recv_string() putdata_socket.close() print("Data sendend: ", current_file_url) except FileNotFoundError: print('No existe el file: ', current_file_url) return 1
def new_get_data(self, file_url, is_byte_data=True): context = zmqa.Context() filesystem_socket = context.socket(zmq.REQ) # print("Vamos a buscar los resultados") print( "Client:filesystem addr:", "tcp://" + self.filesystem_addr[0] + ":" + self.filesystem_addr[1]) filesystem_socket.connect("tcp://{}:{}".format(*self.filesystem_addr)) filesystem_socket.send_string( str(Message().set_message("GETDATA", { "byte": is_byte_data, "file_url": file_url }))) data_sock_message = mt.loop_tool(mt.try_to_recv, filesystem_socket.recv_string) if data_sock_message == -1: print("No me respondio el filesystem") return -1 if data_sock_message.message_name != "OK": print( "Client:Some error happened getting the message with the ip and port to get the result of " "the operation") print("instead, data sock message was:", str(data_sock_message)) return 1 data_socket = context.socket(zmq.REQ) data_socket.connect("tcp://{}:{}".format( data_sock_message.payload["ip"], data_sock_message.payload["port"])) data_socket.send_string(str(Message().set_message("OK"))) # print("Nos preparamos para recibir linea por linea") eof_cha = Client._get_end_character(is_byte_data) result_byte = eof_cha temp_line = mt.loop_tool(mt.raw_try_to_recv, data_socket.recv) if temp_line == -1: print("No me respondio el filesystem") return -1 while temp_line != eof_cha: # print("Recibimos la linea") data_socket.send_string(str(Message().set_message("OK"))) result_byte += temp_line temp_line = mt.loop_tool(mt.raw_try_to_recv, data_socket.recv) if temp_line == -1: print("No me respondio el filesystem") return -1 import pickle result_data = pickle.loads(result_byte) # print("Tenemos los resultados: ", result_data) return result_data
def save_key_file(self, file_sys_addr, data_url, key_value_pair): ''' Manda a hacer append al filesystem el objeto key_value_pair en el archivo data_url :param data_url: la url del archivo que se le va a hacer append :param key_value_pair: par (key,[value]). Se le pone el value en una lista para que al filesystem cuando le manden append haga += para concatenar las listas :return: 0 ''' # supongamos que podamos hacer APPENDDATA payload {file_url: blabla, byte: true} # el file sys tiene que hacerle pickle loads a lo que le mande (data0 = pickle.loads(data)) # el file sys tiene que hacer lock, luego hacer data1 = pickle.load(file) # con data en la mano hacer data1[1]+= data0 # y luego pickle.dump(data1,file) message = mt.Message() message.set_message("APPENDDATA", {'file_url': data_url, 'byte': True}) answer = mt.loop_tool(mt.try_to_send_message, mt.send_message, message, file_sys_addr, lambda: print("No me contesta el File Sys")) if_none = self._verify_if_errors_in_fs(answer) if if_none == -1: return -1 elif if_none == 1: print('Me dijeron que esta puesto un lock en :', data_url) print("Intentamos de nuevo hacer appenddata a: ", data_url) time.sleep(0.5) return self.save_key_file(file_sys_addr, data_url, key_value_pair) # print('Ahora mandare a escribir en este archivo: ', data_url) if answer.message_name == 'OK': new_addr = (answer.payload['ip'], answer.payload['port']) # print("DataHandler->>>: este es el key value pair que intentamos mandar: ",key_value_pair) answer = mt.loop_tool(mt.try_send_byte_data, mt.send_byte_data, key_value_pair, new_addr, lambda: print(" file sys did't respond")) if_none = self._verify_if_errors_in_fs(answer) if if_none == -1: return self._reset_method_if_no_answer_from_fs( self.save_key_file, file_sys_addr, data_url, key_value_pair) # answer = mt.loop_tool(mt.try_to_send_message, mt.send_byte_data, b'', new_addr, # lambda: print('Task_Exec: ', "file sys did't respond")) # # if_none = self._verify_if_errors_in_fs() # if if_none == -1: # return self._reset_method_if_no_answer_from_fs(self.save_key_file, file_sys_addr, data_url, # key_value_pair) # print("Guardamos el archivo: ", data_url) return 0
def get_pyobj_data(self, file_sys_addr, data_file_name='map_data', data_url=None): if data_url is None: file_url = "{}/{}".format(self.job_url, data_file_name) else: file_url = data_url message = mt.Message() message.set_message("GETDATA", {'file_url': file_url, 'byte': True}) answer = mt.loop_tool(mt.try_to_send_message, mt.send_message, message, file_sys_addr, lambda: print("File system didn't respond")) if_none = self._verify_if_errors_in_fs(answer) if if_none == -1: return self._reset_method_if_no_answer_from_fs( self.get_pyobj_data, file_sys_addr, data_file_name, data_url) elif if_none == 1: time.sleep(0.5) print("Intentamos de nuevo get_pyobj_data") return self.get_pyobj_data(file_sys_addr, data_file_name, data_url) if answer.message_name == 'OK': data_and_is_byte = self._get_complete_object( answer, b'', mt.send_message_recv_byte) if data_and_is_byte == -1: return self._reset_method_if_no_answer_from_fs( self.get_pyobj_data, file_sys_addr, data_file_name, data_url) data, is_byte = data_and_is_byte # print( 'Este es el objeto que me mandaron a buscar: ',data) return data, is_byte
def try_to_connect_master(self): print("Client: ", 'Esperando respuesta por: ', self.listener_addr) while True: print("Client: Into the while trying to get some master") sock = zmqa.Context().socket(zmq.REQ) self._get_new_master() print("Client: ", "Trying to cannect to: ", self.current_master) sock.connect("tcp://{}:{}".format(*self.current_master)) sock.send_string( str(Message().set_message( "JOB", { "job_url": self.job_url, "job_id": self.jhash, "data_type": self.data_type, "client_addr": self.listener_addr, "functions_url": self.files_urls["functions_url"], "map_data_url": self.files_urls["data_url"], "status_db_url": self.files_urls["db_url"] }))) print("Enviado el mensaje de job") answer = mt.loop_tool(mt.try_to_recv, sock.recv_string) if answer == -1: print("Client: ", self.current_master, " did not responded") self._get_new_master() continue print("Client:JOB answer:", answer) if answer.message_name == "OK": self.master_ping_addr = (answer.payload["ping_tracker_ip"], answer.payload["ping_tracker_port"]) print("Client: ", "Sended job") return 0
def send_job(self, data_file, function_file): """ A function to put a job's data on the service's filesystem :param data_file: The url of the data file to send :param function_file: The url of the function file to send :param data_type: The type of data stored on the data file :return: 0 if worked OK """ print("Client: ", 'Tratamos de hacer jobreg a: ', self.current_master) client_context = zmqa.Context() register_socket = client_context.socket(zmq.REQ) register_socket.connect("tcp://" + self.current_master[0] + ":" + self.current_master[1]) register_message = Message().set_message( "JOBREG", { "files": [function_file, data_file], "extra_info": self.job_info_for_register }) register_socket.send_string(str(register_message)) answer = mt.loop_tool(mt.try_to_recv, register_socket.recv_string) if answer == -1: print("Client: ", 'No me respondio: ', self.current_master) register_socket.close() return -1 print("Client: ", "Recibi del jobreg: ", answer) # print("Client:answer del JOBREG:", answer) if answer.message_name == "OK": self.jhash = answer.payload["job_id"] self.masters_list = answer.payload["trackers_addr"] # print(answer.payload["filesystem"]) self.filesystem_addrs = answer.payload["filesystem"] self.filesystem_addr = self._get_new_filesystem_node() if "info" in answer.payload.keys(): print("Info:", answer.payload["info"]) else: print("Client:Job not correctly sent") return -1 # First sends the files to the filesystem while True: r = self.send_job_data_to_fs(function_file, data_file) if r == -1: print("Client: Problem with the FS") # todo: tengo que escoger otro nodo de filesystem self.filesystem_addr = self._get_new_filesystem_node() else: break register_socket.close() assert self.try_to_connect_master() != -1, "No hay workers disponibles" return 0
def send_error_message_to_addr(self, payload, addr): error_message = mt.Message().set_message('ERROR', payload) answer = mt.loop_tool( mt.try_to_send_message, mt.send_message, error_message, addr, lambda: print('JobTracker: ', 'No me respondio: ', addr)) if answer is not None: print('JobTracker: ', 'Me respondio: ', addr) self.send_done_to_current_worker(self.current_worker_addr, 'job')
def save_block(self, file_sys_addr, data_urls, pairs, status_db_url, block_id): block_size = len(data_urls) message = mt.Message() message.set_message( "APPENDBLOCK", { 'block_size': block_size, 'status_db_url': status_db_url, 'block_id': block_id }) answer = mt.loop_tool(mt.try_to_send_message, mt.send_message, message, file_sys_addr, lambda: print("No me contesta el File Sys")) if_none = self._verify_if_errors_in_fs(answer) if if_none == -1: return self._reset_method_if_no_answer_from_fs( self.save_block, file_sys_addr, data_urls, pairs, status_db_url, block_id) new_addr = (answer.payload['ip'], answer.payload['port']) message.set_message("APPENDBLOCK", { 'data_urls': data_urls, 'pairs': pairs }) answer = mt.loop_tool(mt.try_to_send_message, mt.send_message, message, new_addr, lambda: print("No me contesta el File Sys")) if_none = self._verify_if_errors_in_fs(answer) if if_none == -1: return self._reset_method_if_no_answer_from_fs( self.save_block, file_sys_addr, data_urls, pairs, status_db_url, block_id) if isinstance(answer, mt.Message) and answer.message_name == 'OK': return 0 return self._reset_method_if_no_answer_from_fs(self.save_block, file_sys_addr, data_urls, pairs, status_db_url, block_id)
def get_line_by_line_str(self, file_sys_addr, job_phase, data_file_name='map_data', data_url=None, len_limit=500): if data_url is None: file_url = "{}/{}".format(self.job_url, data_file_name) else: file_url = data_url # print("Mandamos a hacer getdata al file: ",file_url) message = mt.Message() message.set_message("GETDATA", {'file_url': file_url, 'byte': False}) answer = mt.loop_tool(mt.try_to_send_message, mt.send_message, message, file_sys_addr, lambda: print("File system didn't respond")) if_none = self._verify_if_errors_in_fs(answer) if if_none == -1: return self._reset_method_if_no_answer_from_fs( self.get_line_by_line_str, file_sys_addr, job_phase, data_file_name, data_url, len_limit) if answer.message_name == 'OK': new_addr = (answer.payload['ip'], answer.payload['port']) message = mt.Message() message.set_message("OK") slices_url = [] i = 0 # print('Intentamos obtener las lineas') slice_, eof = self._get_lines(message, new_addr, len_limit) # print("Getlines devolvio: {},{}".format(slice_,eof)) if slice_ == -1: return self._reset_method_if_no_answer_from_fs( self.get_line_by_line_str, file_sys_addr, job_phase, data_file_name, data_url, len_limit) slices_url.append( self._save_split_get_url(file_sys_addr, i, job_phase, slice_)) # print("Intentamos obtener las lineas en el while del get_line_by_line") while not eof: slice_, eof = self._get_lines(message, new_addr, len_limit) # print("Getlines devolvio: {},{}".format(slice_, eof)) if slice_ == -1: return self._reset_method_if_no_answer_from_fs( self.get_line_by_line_str, file_sys_addr, job_phase, data_file_name, data_url, len_limit) i += 1 slices_url.append( self._save_split_get_url(file_sys_addr, i, job_phase, slice_)) is_byte = False return slices_url, is_byte
def save_split_byte_data(self, file_sys_addr, data_url, data): # print( "Esto es lo que voy a mandar a buscar: ",data_url) message = mt.Message() message.set_message("PUTDATA", { 'file_url': data_url, 'byte': True, }) answer = mt.loop_tool(mt.try_to_send_message, mt.send_message, message, file_sys_addr, lambda: print("No me contesta el File Sys")) if_none = self._verify_if_errors_in_fs(answer) if if_none == -1: return self._reset_method_if_no_answer_from_fs( self.save_split_byte_data, file_sys_addr, data_url, data) if if_none == 1: time.sleep(0.5) print("Intentamos de nuevo save_split_byte_data") return self.save_split_byte_data(file_sys_addr, data_url, data) if answer.message_name == 'OK': new_addr = (answer.payload['ip'], answer.payload['port']) answer = mt.loop_tool(mt.try_send_byte_data, mt.send_byte_data, data, new_addr, lambda: print("file sys did't respond")) if_none = self._verify_if_errors_in_fs(answer) if if_none == -1: return self._reset_method_if_no_answer_from_fs( self.save_split_byte_data, file_sys_addr, data_url, data) answer = mt.loop_tool(mt.try_to_send_message, mt.send_byte_data, b'', new_addr, lambda: print("file sys did't respond")) # # if_none = self._verify_if_errors_in_fs(answer) # if if_none == -1: # return self._reset_method_if_no_answer_from_fs(self.save_split_byte_data, file_sys_addr, data_url, data) # print( "Guardamos el archivo: ",data_url) return 0
def send_done_to_current_worker(self, current_worker_addr, role): # Le mandamos a mi propio worker que terminamos la tarea print('Task_Exc: ', 'Mandamos DONE al propio worker') message = mt.Message().set_message('DONE', {'role': role}) answer = mt.loop_tool( mt.try_to_send_message, mt.send_message, message, current_worker_addr, lambda: print('Task_Exc: ', 'No me respondio mi propio worker')) if answer is not None: print('Task_Exc: ', 'Termine') exit()
def new_put_job(self): context = zmqa.Context() # new_addr = (self.client_ip,mt.get_available_random_port(self.client_ip)) filesystem_socket = context.socket(zmq.REQ) print("Client: ", 'Tratemos de hacer putjob a: ', self.filesystem_addr) filesystem_socket.connect("tcp://{}:{}".format(*self.filesystem_addr)) message = Message().set_message("PUTJOB", {"job_id": self.jhash}) filesystem_socket.send_string(str(message)) answer = mt.loop_tool(mt.try_to_recv, filesystem_socket.recv_string) if answer == -1: print("PUTJOB answer timed out!") filesystem_socket.close() return -1 if answer.message_name != 'OK': print("No me devolvieron OK") filesystem_socket.close() return -1 filesystem_socket.close() data_addr = answer.payload["ip"] + ":" + answer.payload["port"] putjob_context = zmqa.Context() putjob_socket = putjob_context.socket(zmq.REQ) putjob_socket.connect("tcp://" + data_addr) putjob_socket.send_string(str(Message().set_message("OK"))) answer = mt.loop_tool(mt.try_to_recv, putjob_socket.recv_string) if answer == -1: print("PUTJOB answer timed out!") putjob_socket.close() return -1 if answer.message_name != "OK": print("Error") putjob_socket.close() return -1 if 'job_url' in answer.payload and 'database_url' in answer.payload: job_url = answer.payload['job_url'] database_url = answer.payload['database_url'] putjob_socket.close() return job_url, database_url putjob_socket.close() return -1
def _ping_master(self): context = zmq.Context() socket = context.socket(zmq.REQ) socket.connect("tcp://{}:{}".format(*self.master_ping_addr)) message = Message().set_message("PING", {"info": "Are you there?"}) answer = mt.loop_tool( mt.try_to_send_message, mt.send_message, message, self.master_ping_addr, lambda: print("Client: ", "No me respondio el master"), 2) if answer is None: return False return True
def save_split_str_data(self, file_sys_addr, slice_url, data): message = mt.Message() message.set_message("PUTDATA", {'file_url': slice_url, 'byte': False}) answer = mt.loop_tool(mt.try_to_send_message, mt.send_message, message, file_sys_addr, lambda: print("No me contesta el File Sys")) if_none = self._verify_if_errors_in_fs(answer) if if_none == -1: return self._reset_method_if_no_answer_from_fs( self.save_split_str_data, file_sys_addr, slice_url, data) if if_none == 1: time.sleep(0.5) print("Intentamos de nuevo save_split_str_data") return self.save_split_str_data(file_sys_addr, slice_url, data) if answer.message_name == 'OK': new_addr = (answer.payload['ip'], answer.payload['port']) message = data answer = mt.loop_tool(mt.try_to_send_message, mt.send_message, message, new_addr, lambda: print("file sys did't respond")) if_none = self._verify_if_errors_in_fs(answer) if if_none == -1: return self._reset_method_if_no_answer_from_fs( self.save_split_str_data, file_sys_addr, slice_url, data) answer = mt.loop_tool(mt.try_to_send_message, mt.send_message, '', new_addr, lambda: print("file sys did't respond")) # if_none = self._verify_if_errors_in_fs(answer) # if if_none == -1: # return self._reset_method_if_no_answer_from_fs(self.save_split_str_data, file_sys_addr, slice_url, data) else: # print("Error when saving slice: ", data) return -1
def send_done_to_current_worker(self, current_worker_addr, role): # Le mandamos a mi propio worker que terminamos la tarea print('JobTracker: ', 'Mandamos DONE al propio worker') message = mt.Message().set_message('DONE', {'role': role}) answer = mt.loop_tool( mt.try_to_send_message, mt.send_message, message, current_worker_addr, lambda: print('JobTracker: ', 'No me respondio mi propio worker')) if answer is not None: print('JobTracker: ', 'Termine') print("JobTracker: ", "Le hice terminate al pinging process") self.pinging_process.join() exit()
def _remove_job(self): sock = zmqa.Context().socket(zmq.REQ) sock.connect("tcp://{}:{}".format(*self.filesystem_addr)) sock.send_string( str(Message().set_message("REMOVEJOB", {'job_url': self.job_url}))) answer = mt.loop_tool(mt.try_to_recv, sock.recv_string) if answer == -1: print("Client: No me respondio el FileSystem") return -1 if answer.message_name == "OK": print("Client: Job removed") else: print("Client: error al remover el job:", answer.payload['info']) return 0
def send_result(self, result_url): print("JobTracker: ", "Intentemos mandarle el resultado_url al cliente") message = mt.Message().set_message("DONE", {'result_url': result_url}) answer = mt.loop_tool( mt.try_to_send_message, mt.send_message, message, self.client_addr, lambda: print('JobTracker: ', "Can't send results to client", self. client_addr)) if answer is not None and answer.message_name == 'OK': print('JobTracker: ', "EL cliente recibio la url del resultado final") self.send_done_to_current_worker(self.current_worker_addr, 'job') return 0
def send_finished_task(self): payload = { 'worker_addr': self.current_worker_addr, 'block_id': self.block_id } print('Task_Exc: ', 'Voy a mandarle Done al tracker por: ', self.answer_addr) message = mt.Message().set_message("DONE", payload) answer = mt.loop_tool( mt.try_to_send_message, mt.send_message, message, self.answer_addr, lambda: print('Task_Exc: ', 'No pude mandarle Done al tracker')) print('Task_Exc: ', 'Esta fue la respuesta del JobTracker ', answer) if answer is None: print('Task_Exc: ', "No me respondieron") self.send_done_to_current_worker(self.current_worker_addr, 'task')
def _send_single_task_message(self, block_urls, put_byte, worker, block_id): message = mt.Message() payload = { 'job_url': self.job_url, 'job_id': self.job_id, 'function_url': self.functions_url, 'block_urls': block_urls, 'task': self.job_phase, 'answer_addr': self.tracker_addr, 'load_byte': put_byte, 'status_db_url': self.status_db_url, 'block_id': block_id } message.set_message("TASK", payload) answer = mt.loop_tool( mt.try_to_send_message, mt.send_message, message, worker, lambda: print( 'JobTracker: ', "No me respondio el worker ", worker)) return answer, block_urls
def wait_for_results(self, assign_tasks, workers_addrs, put_byte): context = zmqa.Context() answer_socket = context.socket(zmq.REP) try: answer_socket.bind('tcp://{}:{}'.format(*self.tracker_addr)) except zmq.error.ZMQError: print("JobTracker: ZMQERROR in wait_for_results") answer_socket.bind('tcp://{}:{}'.format( self.tracker_ip, mt.get_available_random_port(self.tracker_ip))) print('JobTracker: ', "Ahora el tracker espera por respuestas DONE por: ", self.tracker_addr) print("JobTracker: ", 'Esperando Done de: ', workers_addrs) cnt_answers = 0 while True: ans = mt.loop_tool(self._try_to_recv_done, answer_socket) if ans is None: submitted_filters = [('state', mt.slices_states[1]), ('phase', self.job_phase)] writing_filters = [('state', mt.slices_states[-2]), ('phase', self.job_phase)] print( "JobTracker: Buscamos los submitted y los writing blocks") submitted_blocks_rows = self.status_handler.get_status_rows( self.file_sys_addr, 'block', submitted_filters) writing_blocks_rows = self.status_handler.get_status_rows( self.file_sys_addr, 'block', writing_filters) print("JobTracker: ", "Ya recibimos los bloques de submitted y writing") if len(submitted_blocks_rows) == 0: if len(writing_blocks_rows) == 0: print("JobTracker: ", 'Ya todos los bloques estan en DONE') return 0 continue testing_blocks = self._get_blocks_urls_with_blocks_rows( submitted_blocks_rows) if testing_blocks == 0: answer_socket.close() return 0 # print("JobTracker: ",'ESTOS SON LOS BLOQUES QUE QUEDAN ES ESTADO SUBMITTED: ',testing_blocks) indexes_to_remove = [] for i, block in enumerate(testing_blocks): block_id = submitted_blocks_rows[i]['block_id'] worker_assigned_ping = ( submitted_blocks_rows[i]['worker_ip'], submitted_blocks_rows[i]['worker_port']) # Todo: tengo que revisar porqué me dan key error si no se han caido ningun worker print("JobTracker: ", "ESTE ES EL WORKER ASSIGN DE BLOCK_ID ", block_id, " worker: ", worker_assigned_ping) print("JobTracker: ", 'Vamos a hacerle ping a :', worker_assigned_ping, ' con addr ping: ', worker_assigned_ping) answer = mt.loop_tool( mt.try_to_send_message, mt.send_message, mt.Message().set_message("PING"), worker_assigned_ping, lambda: print("JobTracker: ", 'No me respondio: ', worker_assigned_ping), 1) if answer is not None: print( "JobTracker: Ya se que este worker sigue pinchando: ", worker_assigned_ping) indexes_to_remove.append(i) else: print("JobTracker: Parece que murio este worker: ", worker_assigned_ping) print("JobTracker: ", 'buscamos otro worker') print("JobTracker: ", 'Ya le hicimo ping a todos, pacemos a reasignar') testing_blocks = [ testing_blocks[i] for i in range(len(testing_blocks)) if i not in indexes_to_remove ] print( "JobTracker: ", 'ESTOS SON LOS BLOQUES QUE QUEDAN(MOD) En ESTADO SUBMITTED: ', testing_blocks, ' QUE NO ME RESPONDEN') if len(testing_blocks) == 0: continue workers_addrs, workers_status = self._getting_workers() assign_tasks2 = self.send_tasks_messages(testing_blocks, put_byte, workers_status, registered_block=True) for x, y in assign_tasks2.items(): assign_tasks[x] = y continue answer = mt.Message().get_message(ans) if answer.message_name == 'ERROR': # self.send_error_message_to_addr(answer.payload,self.client_addr) print("JobTracker: ", "Error por parte del worker: ", answer.payload['info']) elif answer.message_name == "DONE": worker_addr = answer.payload['worker_addr'] worker_addr = (worker_addr[0], worker_addr[1]) print('JobTracker: ', "Recibi un Done de: {}:{}".format(*worker_addr)) message = mt.Message().set_message("OK") print("JobTracker: ", "Sending OK to: ", worker_addr) answer_socket.send_string(str(message)) cnt_answers += 1
def execute(self): context = zmqa.Context() socket = context.socket(zmq.REP) print("Client:Binded to the listener port:", "tcp://" + self.listener_addr[0] + ":" + self.listener_addr[1]) result = self._try_execute_task() socket.bind("tcp://" + self.listener_addr[0] + ":" + self.listener_addr[1]) assert result != -1, "No se pudo ejecutar el el job" # print("client: result:", result) p = Process(target=self.show_progress_job) p.daemon = True p.start() print("Client: Cargando....") while True: # Está en un while True para que se pueda mandar información por el socket ademas del resultado response = mt.loop_tool(mt.try_to_recv, socket.recv_string, 5) if response == -1: # print("Client: Result response timed out!") if self._ping_master(): # print("Client: ",'Aun esta corriendo el master') continue else: print("Client: Se cayo el master anterior,busquemos otro") self.try_to_connect_master() continue # Le respondo y después reviso lo que me mandó, si de todas formas le voy a responder lo mismo socket.send_string(str(Message().set_message("OK"))) if response.message_name == "DONE" or response.message_name == "ERROR": if response.message_name == "ERROR": print("Client:Error occurred during the operation: ", response.payload["info"]) return -1 print("Listo el resultado") break if response.message_name == "RELOCATE": print("Client: ", "Me Mandaron a hacer Relocate") self.master_ping_addr = (response.payload["ping_tracker_ip"], response.payload["ping_tracker_port"]) self.listener_addr = (response.payload["answer_ip"], response.payload["answer_port"]) socket.close() socket = context.socket(zmq.REP) socket.bind("tcp://" + self.listener_addr[0] + ":" + self.listener_addr[1]) continue else: print("Client:Operation info:", response.payload["info"]) socket.close() result_url = response.payload["result_url"] print("Client:The result is in:", result_url, ", inside the filesystem") current_tries = 0 result_data = None # Tratamos de recoger los resultados while current_tries < self.tries: result_data = self.new_get_data(result_url) if result_data == -1: print("Hubo bateo al buscar los resultados") self.filesystem_addr = self._get_new_filesystem_node() if self.filesystem_addr == -1: print("Client: No pudimos resolver los resultados") return -1 else: break # self.remove_job() return result_data
def new_put_job(self, new_addr, job_id, db_url): print("Estamos dentro del putjob") data_context = zmqa.Context() data_socket = data_context.socket(zmq.REP) try: data_socket.bind('tcp://{}:{}'.format(*new_addr)) except zmq.error.ZMQError: print("FS ZMQERROR") return -1 answer = mt.loop_tool(mt.try_to_recv, data_socket.recv_string, 1) if answer == -1: data_socket.close() print("No me respondieron en el put job") return -1 if answer.message_name != "OK": data_socket.close() print("Hubo algun error en el putjob") return -1 try: init_file = open("./" + str(job_id) + "/__init__.py", "x") init_file.close() db_file = open(db_url, "x") db_file.close() # Now I'm gonna configure the database connection = sql.connect(db_url) cursor = connection.cursor() cursor.execute('''CREATE TABLE block (block_id text PRIMARY KEY NOT NULL, state text, phase text,worker_ip text,worker_port text)''' ) cursor.execute('''CREATE TABLE slices_url (slice_url text PRIMARY KEY NOT NULL , block_id text, FOREIGN KEY (block_id) REFERENCES block (block_id)) ''') cursor.execute('''CREATE TABLE result_url (result_url text PRIMARY KEY NOT NULL )''') cursor.execute('''CREATE TABLE block_result (block_id text NOT NULL, result_url text NOT NULL, PRIMARY KEY (block_id,result_url) , FOREIGN KEY (block_id) REFERENCES block (block_id) ON DELETE CASCADE ON UPDATE NO ACTION, FOREIGN KEY (result_url) REFERENCES result_url (result_url) ON DELETE CASCADE ON UPDATE NO ACTION )''') cursor.execute('''CREATE TABLE job (job_id text PRIMARY KEY NOT NULL, tracker_ip_ping text, tracker_port_ping text, answer_ip text, answer_port text, status_phase text, map_data_url text, result_url text, job_state text, data_type text)''' ) connection.commit() cursor.close() connection.close() except FileExistsError: print("FS:Los archivos de inicialización ya existen... no sé cómo") data_socket.send_string( str(Message().set_message("OK", { "job_url": "./" + job_id, "database_url": db_url }))) data_socket.close() print("Se hizo putjob") return 0
def _check_if_exists_job(self): lines = self.status_handler.get_status_rows(self.file_sys_addr, 'job', catch_error=False) if len(lines) == 0 or lines == -1: self.insert_job() return 0 line = lines[0] # si termino el job if line['status_phase'] == mt.task_phases[-1]: result_url = line['result_url'] self.send_result(result_url) # si no ha terminado el job else: # primero verificamos que aun este corriendo el tracker de ese job haciendo ping tracker_addr = (line['tracker_ip_ping'], line['tracker_port_ping']) answer_addr = (line['answer_ip'], line['answer_port']) message = mt.Message().set_message('PING') tracker_alaive = mt.loop_tool( mt.try_to_send_message, mt.send_message, message, tracker_addr, lambda: print("JobTracker: ", "No me respondio el tracker")) # verificamos la respuesta if tracker_alaive is not None and isinstance(tracker_alaive,mt.Message) \ and tracker_alaive.message_name == "OK": # aun esta corriendo el tracker, luego mandamos al cliente el addr # por donde le van a mandar la respuesta print('JobTracker: ', 'Mandamos hacer Relocate') message = mt.Message().set_message( 'RELOCATE', { 'ping_tracker_ip': tracker_addr[0], "ping_tracker_port": tracker_addr[1], "answer_ip": answer_addr[0], 'answer_port': answer_addr[1] }) answer = mt.loop_tool( mt.try_to_send_message, mt.send_message, message, self.client_addr, lambda: print( "JobTracker: ", "No me respondio el tracker")) print("JobTracker: ", 'El cliente me respondio: ', answer) self.send_done_to_current_worker(self.current_worker_addr, 'job') else: # el tracker del job no esta corriendo posiblemente pq se desconectó print( "JobTracker: ", 'El tracker de ese job:{} no esta corriendo'.format( self.job_id)) print('JobTracker: ', "Asumire la tarea") # lo próximo es hacerse cargo de la tarea # para eso lo primero que debe hacer es actualizar el status y poner su addr de ping # todo: recordarle a luiso que la llave de job es un entero, el self.job_id new_ping_addr_changes = [ ('tracker_ip_ping', self.tracker_addr_ping[0]), ('tracker_port_ping', self.tracker_addr_ping[1]) ] # mandamos a actualizar el tracker que se encargara del job self.status_handler.update_status_row(self.file_sys_addr, 'job', ('job_id', self.job_id), new_ping_addr_changes) # actualizamos el estado del job (map o reduce) self.job_phase = line['job_state'] # recordamos el tipo de dato quese estaba analizando self.data_type = line['data_type'] self.status_phase = line['status_phase'] self.load_job_methods[self.status_phase]()