def start(self): try: # start socket server to listen all connections ch = self.iom.create_server_socket(AF_INET, SOCK_STREAM, 100, ("", self.my_port)) n, self.my_port = ch.ss.getsockname() ParaLiteLog.debug("listen on port : %s ..." % str(self.my_port)) # start socket server for local connections self.local_addr = "/tmp/paralite-local-addr-orderby-%s-%s-%s" % (gethostname(), self.cqid, self.opid) if os.path.exists(self.local_addr): os.remove(self.local_addr) self.iom.create_server_socket(AF_UNIX, SOCK_STREAM, 10, self.local_addr) # register local port to the master self.register_to_master(self.cqid, self.opid, gethostname(), self.my_port) ParaLiteLog.debug("reg to master: FINISH") while self.is_running: ev = self.next_event(None) if isinstance(ev, ioman_base.event_accept): self.handle_accept(ev) if isinstance(ev, ioman_base.event_read): if ev.data != "": self.handle_read(ev) ParaLiteLog.info("--orderby node %s on %s is finished--" % (self.opid, gethostname())) except KeyboardInterrupt, e: self.report_error("ParaLite receives a interrupt signal and then will close the process\n") ParaLiteLog.info("--orderby node %s on %s is finished--" % (self.opid, gethostname())) sys.exit(1)
def sql_proc(self): try: ParaLiteLog.debug("sql proc : START") # start local socket server to listen all connections ch = self.iom.create_server_socket(AF_INET, SOCK_STREAM, 100, ("", self.my_port)) n, self.my_port = ch.ss.getsockname() ParaLiteLog.debug("listen on port : %s ..." % str(self.my_port)) # register local port to the master self.register_to_master(self.cqid, self.opid, gethostname(), self.my_port) ParaLiteLog.debug("reg to master: FINISH") while self.is_running: s_time = time.time() ev = self.next_event(None) if isinstance(ev, ioman_base.event_accept): self.handle_accept(ev) if isinstance(ev, ioman_base.event_read): if ev.data != "": e_time = time.time() self.handle_read(ev) for thd in self.threads: thd.join() for proc in self.processes: proc.join() ParaLiteLog.info("--sql node %s on %s is finished--" % (self.opid, gethostname())) #self.notifier.join() except KeyboardInterrupt, e: self.report_error("ParaLite receives a interrupt signal and then will close the process\n") ParaLiteLog.info("--sql node %s on %s is finished--" % (self.opid, gethostname())) sys.exit(1)
def send_to_node(self, db, table, data, addr, row_sep, col_sep, is_replace): sep = conf.SEP_IN_MSG req_info = "%s%s%s%s%s%s%s%s%s%s%s%s%s" % (conf.INFO, sep, db, sep, table, sep, self.db_col_sep, sep, col_sep, sep, row_sep, sep,is_replace) ParaLiteLog.info("sending %s --> %s" % (req_info, addr[0])) self.really_send(addr, req_info) # use the first 10 charactors to indicate the database self.really_send(addr, "%10s%s%s" % (len(db), db, data)) ParaLiteLog.info("sending data : %s --> %s" % (len(data), repr(addr)))
def distribute_data(self): whole_data = cStringIO.StringIO() for i in self.result: for csio in self.result[i]: d = string.strip(csio.getvalue()) if len(d) == 0: continue whole_data.write(d) whole_data.write("\n") del csio if self.limit != -1: data_list = whole_data.getvalue().split(self.db_row_sep)[:self.limit] del whole_data data = cStringIO.StringIO() data.write(self.db_row_sep.join(str(s) for s in data_list)) del data_list else: data = whole_data if self.dest == conf.DATA_TO_ONE_CLIENT: # send data to a random client random_num = random.randint(0, len(self.client_sock) - 1) addr = self.client_sock[random_num] sock = socket(AF_INET, SOCK_STREAM) sock.connect(addr) data_s = data.getvalue() sock.send("%10s%s" % (len(data_s), data_s)) re = sock.recv(10) assert re == "OK" sock.close() elif self.dest == conf.DATA_TO_DB: self.data = data col_sep = self.db_col_sep row_sep = self.db_row_sep master = (self.master_name, self.master_port) ParaLiteLog.info("proc_select: load data start") # send request to the master t_size = len(data.getvalue()) sep = conf.SEP_IN_MSG tag = conf.LOAD_FROM_API if row_sep is None or row_sep == "\n": temp_sep = "NULL" else: temp_sep = row_sep msg = sep.join( str(s) for s in [conf.REQ, self.cqid, gethostname(), self.my_port, self.dest_db, self.dest_table, t_size, tag, self.fashion, temp_sep, "0"]) so_master = socket(AF_INET, SOCK_STREAM) so_master.connect(master) so_master.send("%10s%s" % (len(msg),msg)) so_master.close()
def proc_drop(self, exp, target_db): try: for db in target_db: conn = sqlite3.connect(db) c = conn.cursor() c.execute(exp) conn.commit() conn.close() except sqlite3.OperationalError, e: es("%s: %s" % (gethostname(), " ".join(e.args))) ParaLiteLog.info(traceback.format_exc())
def step(self, value): try: newvalue = value if isinstance(value, unicode): newvalue = value.encode("ascii") if isinstance(newvalue, str): newvalue = string.atoi(newvalue) self.product *= newvalue except: ParaLiteLog.info(traceback.format_exc()) raise(Exception(traceback.format_exc()))
def proc_create(self, exp, target_db): try: # first of all, check if the directory holds database exists or not for db in target_db: parent = db[0:db.rfind(os.sep)] if not os.path.exists(parent): os.makedirs(parent) conn = sqlite3.connect(db) c = conn.cursor() c.execute(exp) conn.commit() conn.close() except sqlite3.OperationalError, e: ParaLiteLog.info(traceback.format_exc()) raise(Exception("ERROR: in proc_create: %s: %s" % (gethostname(), " ".join(e.args))))
def main(): if len(sys.argv) != 7: sys.exit(1) proc = OrderbyOp() proc.master_name = sys.argv[1] proc.master_port = string.atoi(sys.argv[2]) proc.cqid = sys.argv[3] proc.opid = sys.argv[4] proc.my_port = string.atoi(sys.argv[5]) proc.log_dir = sys.argv[6] if not os.path.exists(proc.log_dir): os.makedirs(proc.log_dir) cur_time = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime(time.time())) ParaLiteLog.init("%s/orderby-%s-%s.log" % (proc.log_dir, gethostname(), cur_time), logging.DEBUG) ParaLiteLog.info("--orderby node %s on %s is started" % (proc.opid, gethostname())) proc.start()
def main(): if len(sys.argv) != 7: sys.exit(1) proc = SqlOp() proc.master_name = sys.argv[1] proc.master_port = string.atoi(sys.argv[2]) proc.cqid = sys.argv[3] proc.opid = sys.argv[4] proc.my_port = string.atoi(sys.argv[5]) proc.log_dir = sys.argv[6] if not os.path.exists(proc.log_dir): os.makedirs(proc.log_dir) ParaLiteLog.init("%s/sql-%s-%s-%s.log" % ( proc.log_dir, gethostname(), proc.cqid, proc.opid), logging.DEBUG) ParaLiteLog.info("--sql node %s on %s is started" % (proc.opid, gethostname())) proc.sql_proc()
def get_data_by_blocksize(self, jobid, bksize): if self.reader is None: return None data = self.reader.read(bksize) if not data: read_size = 0 else: read_size = len(data) if read_size < bksize or read_size == bksize and read_size == self.job_data[jobid]: # while True: # if self.reader is not None: # self.reader.close() # self.reader = None # self.reader = self.get_next_reader() # if self.reader is None: # break # new_data = self.reader.read(bksize - read_size) # data += new_data # read_size = len(data) # if read_size >= bksize: # break if self.reader is not None: self.reader.close() self.reader = None self.reader = self.get_next_reader() return data if self.db_row_sep == "\n": if not data.endswith("\n"): extra_data = self.reader.readline() if extra_data: data += extra_data return data else: if data: pos = data.rfind(self.db_row_sep) ParaLiteLog.info(pos) send_ds = self.left_ds + data[0:pos] self.left_ds = data[pos+len(self.db_row_sep):] return send_ds else: return None
def scan_data_queue(self): while True: data = self.queue.get() if data == conf.END_TAG: ParaLiteLog.info("SCAN DATA QUEUE : END") break try: pos = 10+string.atoi(data[0:10].strip()) target_db = data[10:pos] data = data[pos:] """ thd = threading.Thread(target=self.write_to_db, args=(data, len(data))) thd.setDaemon(True) thd.start() self.threads.append(thd) """ self.write_to_db(target_db, data, len(data)) del(data) except Exception, e: ParaLiteLog.info(traceback.format_exc()) es("in write_to_db: %s" % (traceback.format_exc())) sys.exit(1)
def handle_read(self, ev): data = ev.data if data == conf.END_TAG: ParaLiteLog.info("receive: END_TAG") self.is_running = False self.queue.put(conf.END_TAG) elif data.startswith(conf.INFO): m = data.split(conf.SEP_IN_MSG) assert len(m) == 7 if self.table == None: self.table = m[2] if self.db_col_sep == None: self.db_col_sep = m[3] if self.cmd_col_sep == None: self.cmd_col_sep = m[4] if self.cmd_row_sep == None: self.cmd_row_sep = m[5] if self.is_replace == None: self.is_replace = m[6] ParaLiteLog.info("DB_COL_SEP = %s CMD_COL_SEP = %s CMD_ROW_SEP = %s is_replace = %s" % (self.db_col_sep, self.cmd_col_sep, self.cmd_row_sep, self.is_replace)) else: """ TODO: we can control the buffer size here. """ self.queue.put(data) """
def start(self, argument): self.parse(argument) cur_time = time.strftime('%Y-%m-%d-%H-%M-%S',time.localtime(time.time())) ParaLiteLog.init("%s/dload-server-%s-%s.log" % (self.log_dir, gethostname(), cur_time), logging.DEBUG) ParaLiteLog.info("START") ParaLiteLog.info("parse the argumens sucessfully") ss = time.time() scan_thd = threading.Thread(target=self.scan_data_queue) scan_thd.setDaemon(True) scan_thd.start() t = time.strftime('%Y-%m-%d-%H-%M-%S',time.localtime(time.time())) self.local_socket = "%s%s%s-%s-%s" % (self.log_dir, os.sep, gethostname(), t, "UNIX.d") self.iom.create_server_socket(AF_UNIX, SOCK_STREAM, 5, self.local_socket) ch = self.iom.create_server_socket(AF_INET, SOCK_STREAM, 5, ("", self.port)) n, self.port = ch.ss.getsockname() ParaLiteLog.info("global socket addr = %s" % (repr(ch.ss.getsockname()))) self.register_to_master() try: while self.is_running: ev = self.next_event(None) if isinstance(ev, ioman_base.event_accept): self.handle_accept(ev) elif isinstance(ev, ioman_base.event_read): if ev.data != "": self.handle_read(ev) except Exception, e: es("in dload_server.py : %s" % traceback.format_exc()) ParaLiteLog.info(traceback.format_exc()) sys.exit(1)
def range_data(self): ParaLiteLog.info("Now RANGE FASHION is not supported...")
def write_to_db(self, db, data, size): ss = time.time() ParaLiteLog.info("%s: START: size = %s" % (self.write_to_db.__name__, size)) record_num = 0 if self.is_replace == "True": ParaLiteLog.info("LOAD: when is_replace = True") con = sqlite3.connect(db) con.text_factory = str cr = con.cursor() if self.cmd_row_sep == "None" or self.cmd_row_sep is None: lines = data.split("\n") else: lines = data.split(self.cmd_row_sep) ParaLiteLog.info(len(lines)) template = None for line in lines: if line == "": continue #x = tuple([ unicode_or_buffer(s.replace("\\n", "\n").replace("\\t", "\t")) for s in line.split("\t")]) x = tuple([ s.replace("\\n", "\n").replace("\\t", "\t") for s in line.split("\t")]) if template is None: questions = ",".join([ "?" ] * len(x)) template = "insert into %s values(%s);" % (self.table, questions) try: cr.execute(template, x) record_num += 1 except sqlite3.OperationalError,e: es("sqlite3.OperationalError: %s" % traceback.format_exc()) ParaLiteLog.info(traceback.format_exc()) sys.exit(1) ParaLiteLog.info("record_num is %s" % (record_num)) con.commit() cr.close() con.close() ParaLiteLog.info("%s: FINISH" % (self.write_to_db.__name__)) self.cur_db.table_added_record += record_num self.cur_db.table_added_size += size self.cur_db.size += size return
def handle_read(self, event): message = event.data[10:] m = message.split(conf.SEP_IN_MSG) try: if m[0] == conf.JOB_ARGUMENT: self.parse_args(m[1]) ParaLiteLog.info("parse arguments: FINISH") elif m[0] == conf.JOB: ParaLiteLog.debug("MESSAGE: %s" % message) self.cur_jobid = m[1] elif m[0] == conf.DATA: data_id = string.strip(m[1][0:2]) data = m[1][2:] self.source_data.append(data) # sort data if not self.is_data_ready(self.source_data, self.num_of_children): return ParaLiteLog.debug("****SORT DATA****: start") s = 0 for data in self.source_data: s += len(data) ParaLiteLog.debug("source data size: %s" % s) s_time = time.time() rs_type, rs, t_size = self.sort(self.source_data) del self.source_data ParaLiteLog.debug("****SORT DATA****: finish") if rs_type is None: self.send_status_to_master(self.cur_jobid, conf.PENDING) return self.total_size += t_size self.source_data = {} # store the result of one job to the final result for i in range(len(rs)): if i not in self.result: self.result[i] = [rs[i]] else: self.result[i].append(rs[i]) if rs_type != conf.MULTI_FILE: # check if the whole data exceeds the LIMITATION if self.total_size > self.MAX_SIZE: self.write_data_to_disk() self.result_type = conf.MULTI_FILE e_time = time.time() self.total_time += e_time - s_time self.send_status_to_master(self.cur_jobid, conf.PENDING) elif m[0] == conf.JOB_END: ParaLiteLog.debug("MESSAGE: %s" % message) # all jobs are finished self.send_rs_info_to_master(self.total_size, self.total_time) # distribute data if self.dest == conf.DATA_TO_ONE_CLIENT: ParaLiteLog.debug("dest = %s" % self.dest) self.distribute_data() self.send_status_to_master(self.cur_jobid, conf.ACK) self.is_running = False elif self.dest == conf.DATA_TO_DB: self.distribute_data() elif m[0] == conf.DATA_PERSIST: # if the data is requried to be persisted or not if m[1] == conf.CHECKPOINT: self.write_data_to_disk() elif m[0] == conf.DLOAD_REPLY: sep = conf.SEP_IN_MSG reply = sep.join(m[1:]) ParaLiteLog.info("receive the information from the master") ParaLiteLog.debug(reply) if len(self.data.getvalue()) != 0: dload_client.dload_client().load_internal_buffer( reply, self.dest_table, self.data, self.fashion, self.hash_key, self.hash_key_pos, self.db_col_sep, self.db_row_sep, self.db_col_sep, False, "0", self.log_dir, ) # send END_TAG to the master client_id = "0" msg = sep.join([conf.REQ, conf.END_TAG, gethostname(), client_id]) so_master = socket(AF_INET, SOCK_STREAM) so_master.connect((self.master_name, self.master_port)) so_master.send("%10s%s" % (len(msg), msg)) so_master.close() ParaLiteLog.debug("sending to master: %s" % (conf.END_TAG)) ParaLiteLog.debug("----- dload client finish -------") elif message == conf.DLOAD_END_TAG: ParaLiteLog.debug("---------import finish---------") self.send_status_to_master(" ".join(self.cur_jobid), conf.ACK) self.is_running = False elif m[0] == conf.EXIT: self.is_running = False elif m[0] == conf.NODE_FAIL: ParaLiteLog.debug("MESSAGE: %s" % message) # message --> NODE_FAIL:FAILED_NODE:REPLICA_NODE failed_node, replica_node = m[1:3] self.failed_node.append(failed_node) if replica_node != "" and replica_node == gethostname(): # load replica data for the failed node self.recovery_data(self.replica_result, replica_node) ParaLiteLog.debug("Finish to handle node failure message") except Exception, e: es(traceback.format_exc()) ParaLiteLog.info(traceback.format_exc()) self.is_running = False self.no_error = False
msg = sep.join([conf.DATA, "%2s%s" % (self.opid, data)]) if destnode == gethostname(): # use local socket addr = self.p_node[destnode][1] t = AF_UNIX else: addr = (destnode, self.p_node[destnode][0]) t = AF_INET self.send_data_to_node(msg, t, addr) ParaLiteLog.debug( "send data susscufully %s %s --> %s" % ( self.opid, gethostname(), destnode)) elif m[0] == conf.DLOAD_REPLY: reply = sep.join(m[1:]) ParaLiteLog.info("receive the information from the master") ParaLiteLog.debug(reply) if len(self.data.getvalue()) != 0: dload_client.dload_client().load_internal_buffer( reply, self.dest_table, self.data, self.fashion, self.hash_key, self.hash_key_pos, self.db_col_sep, self.db_row_sep, self.db_col_sep, False, "0", self.log_dir) # send END_TAG to the master client_id = "0" msg = sep.join([conf.REQ, conf.END_TAG, gethostname(), client_id]) so_master = socket(AF_INET, SOCK_STREAM) so_master.connect((self.master_name, self.master_port)) so_master.send("%10s%s" % (len(msg), msg)) so_master.close()
def distribute_data(self): # handle the limit condition: get the first N records # E.g. select ... limit 10, the master firstly decides the limit # number for each process and set the limit value for each process # to be the post-limit whole_data = cStringIO.StringIO() for i in self.result: for csio in self.result[i]: d = string.strip(csio.getvalue()) if len(d) == 0: continue whole_data.write(d) whole_data.write("\n") del csio if self.distinct or self.limit != -1: data_list = whole_data.getvalue().split(self.db_row_sep) del whole_data if self.distinct: data_list = set(data_list) if self.limit != -1: data_list = data_list[:self.limit] data = cStringIO.StringIO() data.write(self.db_row_sep.join(str(s) for s in data_list)) del data_list else: data = whole_data if self.dest == conf.DATA_TO_DB: self.data = data col_sep = self.db_col_sep row_sep = self.db_row_sep master = (self.master_name, self.master_port) ParaLiteLog.info("proc_select: load data start") # send request to the master t_size = len(data.getvalue()) sep = conf.SEP_IN_MSG tag = conf.LOAD_FROM_API if row_sep is None or row_sep == "\n": temp_sep = "NULL" else: temp_sep = row_sep msg = sep.join( str(s) for s in [conf.REQ, self.cqid, gethostname(), self.my_port, self.dest_db, self.dest_table, t_size, tag, self.fashion, temp_sep, "0"]) so_master = socket(AF_INET, SOCK_STREAM) so_master.connect(master) so_master.send("%10s%s" % (len(msg),msg)) so_master.close() # dload_client.dload_client().load_internal_buffer( # master, self.cqid, gethostname(), self.my_port, self.dest_db, # self.dest_table, data, conf.LOAD_FROM_API, self.fashion, # self.hash_key, self.hash_key_pos, self.db_col_sep, row_sep, # col_sep, False, "0", self.log_dir) elif self.dest == conf.DATA_TO_ONE_CLIENT: random_num = random.randint(0, len(self.client_sock) - 1) addr = self.client_sock[random_num] sock = socket(AF_INET, SOCK_STREAM) sock.connect(addr) data_s = data.getvalue() ParaLiteLog.info("DATA SIZE = %s" % len(data_s)) sock.send("%10s%s" % (len(data_s), data_s)) re = sock.recv(10) assert re == "OK" sock.close()
def proc_select(self, jobid, exp, target_db): assert len(target_db) == 1 cur_db = target_db[0] try: conn = sqlite3.connect(cur_db) conn.text_factory = str # register the user-defined aggregate conn.create_aggregate("mul", 1, mul) c = conn.cursor() """ if self.temp_store != 0: c.execute('pragma temp_store=%s' % (self.temp_store)) if self.cache_size != -1: c.execute('pragma cache_size=%s' % (self.cache_size)) """ # for test c.execute('pragma temp_store=memory') c.execute('pragma cache_size=2073741824') ParaLiteLog.info("start to execute sql: %s" % exp) col_sep = self.db_col_sep row_sep = self.db_row_sep num_of_dest = self.partition_num if self.dest == conf.DATA_TO_ANO_OP and num_of_dest > 1: columns = self.output split_key = self.split_key assert split_key is not None # partition data in hash fashion pos = [] for key in split_key: pos.append(columns.index(key)) data_part_list = [] for i in range(self.partition_num): data_part_list.append(cStringIO.StringIO()) size = 0 t_size = 0 for row in c.execute(exp): part_id = abs(hash(self.db_col_sep.join(str(row[p]) for p in pos))) % num_of_dest #part_id = abs(hash(row[pos[0]])) % num_of_dest data = col_sep.join(str(s) for s in row) """ size += len(data) if size > self.MAX_SIZE: for partid in data_part_list: fs = self.write_data_to_disk( partid, data_part_list[partid]) # delete all data in csio data_part_list[partid].truncate(0) t_size += size size = 0 self.result_type = self.MULTI_FILE """ data_part_list[part_id].write(data) data_part_list[part_id].write(row_sep) for i in range(len(data_part_list)): t_size += len(data_part_list[i].getvalue()) ParaLiteLog.debug("finish to retrieve the result: %s" % t_size) if self.result_type == self.MULTI_FILE: for partid in data_part_list: self.write_data_to_disk( partid, data_part_list[partid].getvalue()) del data_part_list return self.MULTI_FILE, None, t_size else: ######################## # new_list = [] # for d in data_part_list: # new_list.append(d.getvalue()) # return self.MULTI_BUFFER, new_list, t_size ################### return self.MULTI_BUFFER, data_part_list, t_size else: csio = cStringIO.StringIO() t_size = 0 size = 0 # record the size of current data data_pos = [] # the file name of data if persisted for row in c.execute(exp): # NOTE: For aggregation SQL, e.g. "select max(col) from T ..." # if there is no record in T, (None,) will be returned if row[0] is None: continue data = col_sep.join(str(s) for s in row) size += len(data) if size >= self.MAX_SIZE: result_type = self.MULTI_FILE self.write_data_to_disk(jobid, csio.getvalue()) # delete all data in csio csio.truncate(0) t_size += size size = 0 csio.write(data) csio.write(row_sep) t_size += len(csio.getvalue()) ParaLiteLog.debug("finish to retrieve the result: %s" % t_size) if self.result_type == conf.MULTI_FILE: self.write_data_to_disk(jobid, csio.getvalue()) del csio return conf.MULTI_FILE, None, t_size else: return self.SINGLE_BUFFER, [csio], t_size except sqlite3.OperationalError, e: ParaLiteLog.info(traceback.format_exc()) raise(Exception("%s: QueryExecutionError: %s" % (gethostname(), traceback.format_exc())))
def handle_read(self, event): message = event.data[10:] sep = conf.SEP_IN_MSG m = message.split(sep) try: if m[0] == conf.JOB_ARGUMENT: self.parse_args(m[1]) ParaLiteLog.info("parsed structure : \n%s" % str(self.expr)) ParaLiteLog.info("parse arguments: FINISH") if self.is_checkpoint is not None and self.is_checkpoint == conf.CHECKPOINT: # this is a recovery operator # init the persisted result data ParaLiteLog.debug("recovery data: START") self.recovery_data(self.result, gethostname()) ParaLiteLog.debug("recovery data: FINISH") self.send_rs_info_to_master(0, 0) else: self.parse_func() # delete all temporary files for this operator os.system("rm -f %s/%s_%s" % (self.temp_dir, "groupby", self.opid)) elif m[0] == conf.JOB: ParaLiteLog.debug("MESSAGE: %s" % message) self.cur_jobid = m[1] self.job_list.append(m[1]) elif m[0] == conf.DATA: data_id = string.strip(m[1][0:2]) data = m[1][2:] self.source_data.append(data) # aggregate data if not self.is_data_ready(self.source_data, self.num_of_children): return ParaLiteLog.debug("****GROUP DATA****: start") s = 0 for data in self.source_data: s += len(data) ParaLiteLog.debug("source data size : %s" % s) s_time = time.time() rs_type, rs, t_size = self.hash_based_aggregate(self.source_data) ParaLiteLog.debug("****GROUP DATA****: finish") del self.source_data self.total_size += t_size self.source_data = [] # store the result of one job to the final result if len(rs) == 1: if self.dest == conf.DATA_TO_ANO_OP or self.dest == conf.DATA_TO_DB: # dest is AGGR op or ORDER op, use 0 as the key if 0 not in self.result: self.result[0] = rs else: self.result[0].append(rs[0]) else: # dest is UDX op, use jobid as the key self.result[string.atoi(self.cur_jobid)] = rs if self.is_checkpoint == 1: self.write_data_to_disk(self.cur_jobid, rs[0].getvalue()) else: # use partid as the key for i in range(len(rs)): if i not in self.result: self.result[i] = [rs[i]] else: self.result[i].append(rs[i]) # check if the whole data exceeds the LIMITATION if rs_type != conf.MULTI_FILE: if self.total_size > self.MAX_SIZE: for dataid in self.result: data = "" for d in self.result[dataid]: data += d self.write_data_to_disk(dataid, data) self.result_type = conf.MULTI_FILE e_time = time.time() self.total_time += (e_time - s_time) self.send_status_to_master(self.cur_jobid, conf.PENDING) elif m[0] == conf.JOB_END: ParaLiteLog.debug("MESSAGE: %s" % message) # all jobs are finished self.send_rs_info_to_master(self.total_size, self.total_time) # distribute data if self.dest == conf.DATA_TO_ONE_CLIENT: self.distribute_data() self.send_status_to_master(" ".join(self.job_list), conf.ACK) self.is_running = False else self.dest == conf.DATA_TO_DB: self.distribute_data() elif m[0] == conf.DATA_PERSIST: # if the data is requried to be persisted or not ParaLiteLog.debug("MESSAGE: %s" % message) self.process_ck_info(m)
sock.close() def proc_drop(self, exp, target_db): try: for db in target_db: conn = sqlite3.connect(db) c = conn.cursor() c.execute(exp) conn.commit() conn.close() except sqlite3.OperationalError, e: es("%s: %s" % (gethostname(), " ".join(e.args))) ParaLiteLog.info(traceback.format_exc()) except Exception, e: es(traceback.format_exc()) ParaLiteLog.info(traceback.format_exc()) def proc_create(self, exp, target_db): try: # first of all, check if the directory holds database exists or not for db in target_db: parent = db[0:db.rfind(os.sep)] if not os.path.exists(parent): os.makedirs(parent) conn = sqlite3.connect(db) c = conn.cursor() c.execute(exp) conn.commit() conn.close() except sqlite3.OperationalError, e: ParaLiteLog.info(traceback.format_exc())
def load_internal_buffer(self, reply, table, buf, fashion, key, key_pos, db_col_sep, row_sep, col_sep, is_replace, client_id, LOG_DIR): ParaLiteLog.info("load_internal: START") ParaLiteLog.info("row separator = %s col separator = %s" % (row_sep, col_sep) ) self.db_col_sep = db_col_sep total_size = len(buf.getvalue()) try: """ received message = nodes # sub_dbs # chunk_num # replica_info nodes should be: n1:p1:l1 , n2:p2:l2 , ... IF fashion = HASH_FASHION n1:p1:l1:s1:num , n2:p2:l2:s2:num , ... IF fashion = ROUND_ROBIN TBD IF fashion = RANGE_FASHION node_db_info: db_1_1 , db_1_2 , db_2_1, ... replica_info: db_1_1 db_1_1_r_1 node1 , db_1_2 db_1_2_r_1 node2 , ... """ mm = reply.split("#") ParaLiteLog.info("receive the information from the master %s" % mm) nodes = mm[0].split(",") sub_dbs = mm[1].split(",") chunk_num = string.atoi(mm[2]) replica = mm[3] replica_info = {} # {db_name : {replica_db_name:node}} if replica != "": for ll in replica.split(","): lll = ll.split(" ") if lll[0] not in replica_info: replica_info[lll[0]] = {} replica_info[lll[0]][lll[1]] = lll[2] ParaLiteLog.info(nodes) node_addr = {} # {node:addr} for node in nodes: m = node.split(conf.SEP_IN_MSG) if m[0] == gethostname(): addr = m[2] else: addr = (m[0], string.atoi(m[1])) node_addr[m[0]] = addr ss1 = time.time() if nodes == []: ParaLiteLog.info("there is no data to load") elif fashion == conf.HASH_FASHION: ParaLiteLog.info(fashion) # get the data for each sub db # db_buf = {db_name, buffer_of_data} db_buf = self.hash_data_buffer(buf, key_pos, nodes, row_sep, col_sep, chunk_num, sub_dbs) for db in db_buf: data = db_buf[db].getvalue() node = db.split("_")[-3] self.send_to_node(db, table, data, node_addr[node], row_sep, col_sep, is_replace) if db in replica_info: for rdb in replica_info[db]: node = replica_info[db][rdb] self.send_to_node(rdb, table, data, node_addr[node], row_sep, col_sep, is_replace) """ buf_scanner = threading.Thread(target=self.scan_buf, args=(table, node_buf, node_addr, row_sep, col_sep, is_replace)) buf_scanner.setDaemon(True) buf_scanner.start() buf_scanner.join() """ elif fashion == conf.REPLICATE_FASHION: self.replicate_data(table, files, total_size, nodes) elif fashion == conf.RANGE_FASHION: self.range_data() else: thds = [] num_of_db = len(nodes) * chunk_num if row_sep is not None and row_sep != "\n": whole_data = buf.getvalue() lines = whole_data.split(row_sep) if lines[len(lines)-1] == "": lines.pop(len(lines)-1) l = len(lines) if l % num_of_db == 0: num_each = l / num_of_db else: num_each = l / num_of_db + 1 i = 0 while i < num_of_db: db = sub_dbs[i] node = db.split("_")[-3] cur_num = i*num_each + num_each if cur_num > l: cur_num = l ds = row_sep.join(lines[i*num_each:cur_num]) thd = threading.Thread(target=self.send_to_node, args=(db, table, ds, node_addr[node], row_sep, col_sep, is_replace)) thd.setDaemon(True) thd.start() thds.append(thd) if db in replica_info: for rdb in replica_info[db]: node = replica_info[db][rdb] thd = threading.Thread(target=self.send_to_node, args=(rdb, table, ds, node_addr[node], row_sep, col_sep, is_replace)) thd.setDaemon(True) thd.start() thds.append(thd) i += 1 else: buf.seek(0) i = 0 while i < num_of_db: db = sub_dbs[i] node = db.split("_")[-3] node_id = i / chunk_num size = string.atoi(nodes[node_id].split(conf.SEP_IN_MSG)[3]) / chunk_num ParaLiteLog.info("start to get data as bk: %s" % (size)) ds = buf.read(size) if ds is None: ParaLiteLog.info("really get data as bk: 0") continue if not ds.endswith("\n"): ds += buf.readline() ParaLiteLog.info("really get data as bk: %s" % (len(ds))) thd = threading.Thread(target=self.send_to_node, args=(db, table, ds, node_addr[node], row_sep, col_sep, is_replace)) thd.setDaemon(True) thd.start() thds.append(thd) if db in replica_info: for rdb in replica_info[db]: node = replica_info[db][rdb] thd = threading.Thread(target=self.send_to_node, args=(rdb, table, ds, node_addr[node], row_sep, col_sep, is_replace)) thd.setDaemon(True) thd.start() thds.append(thd) i += 1 for thd in thds: thd.join() except Exception, e: raise(e)
def handle_read(self, event): message = event.data[10:] sep = conf.SEP_IN_MSG m = message.split(sep) try: if m[0] == conf.DATA_END: ParaLiteLog.debug("MESSAGE: %s" % message) # all data is dipatched to the parent nodes self.send_status_to_master(" ".join(self.job_data), conf.ACK) ParaLiteLog.debug("notify ACK to master") self.is_running = False elif message == conf.END_TAG: ParaLiteLog.debug("MESSAGE: %s" % message) self.send_status_to_master(" ".join(self.job_data), conf.ACK) self.is_running = False elif message == conf.DLOAD_END_TAG: ParaLiteLog.debug("---------import finish---------") self.send_status_to_master(" ".join(self.job_data), conf.ACK) self.is_running = False elif message == conf.EXIT: ParaLiteLog.debug("MESSAGE: %s" % message) self.is_running = False elif m[0] == conf.JOB_ARGUMENT: self.parse_args(m[1]) ParaLiteLog.info("parse arguments: FINISH") # init the persisted result data if self.is_checkpoint is not None and self.is_checkpoint == conf.CHECKPOINT: ParaLiteLog.debug("recovery data: START") # this is a recovery operator self.recovery_data(self.result, gethostname()) ParaLiteLog.debug("recovery data: FINISH") self.send_rs_info_to_master(0, 0) else: # delete all temporary files for this operator os.system("rm -f %s/%s_%s" % (self.temp_dir, "sql", self.opid)) ############################### # scanner = threading.Thread(target=self.scan_process_queue, args=(self.process_queue, )) # scanner.setDaemon(True) # scanner.start() # self.threads.append(scanner) ########################## elif m[0] == conf.JOB: self.ex_s_time = time.time() self.ex_w_time = 0 ParaLiteLog.debug("MESSAGE: %s" % message) s_time = time.time() jobid = m[1] target_db = m[2].split() exp = self.expression ParaLiteLog.debug("*****JOB %s******:start" % jobid) # FAULT TOLERANCE: if jobid in self.job_data: # this is a failed job, we should first delete the old result value if self.dest == conf.DATA_TO_ANO_OP and self.partition_num > 1: for partid in self.result: pos = self.job_list.index(jobid) self.result[partid][pos] = "" else: self.result[jobid] = "" if exp.lower().startswith("select"): """ selection task: (1), execute sql (2), notify the result to the master (3), wait for the DATA_PERSIST message from the master (4), persist data if so (5), notify ACK to the master """ ParaLiteLog.info("proc_select: START") st_time = time.time() rs_type, rs, t_size = self.proc_select(jobid, exp, target_db) et_time = time.time() ParaLiteLog.debug("Job %s cost time %s second" % (jobid, (et_time - st_time))) # FAULT TOLERANCE: if jobid in self.job_data: # this is a failed job if self.dest == conf.DATA_TO_ANO_OP and self.partition_num > 1: for partid in self.result: pos = self.job_list.index(jobid) self.result[partid][pos] = rs[partid] else: self.result[jobid] = rs self.send_status_to_master(jobid, conf.PENDING) return self.job_data[jobid] = t_size self.job_list.append(jobid) self.total_size += t_size # store the result of one job to the final result if len(rs) == 1: if self.dest == conf.DATA_TO_ANO_OP: # dest is AGGR op or ORDER op, use 0 as the key if 0 not in self.result: self.result[0] = rs else: self.result[0].append(rs[0]) if self.is_checkpoint == 1: self.write_data_to_disk(0, rs[0].getvalue()) else: # dest is UDX op, use jobid as the key self.result[string.atoi(jobid)] = rs if self.is_checkpoint == 1: self.write_data_to_disk(0, rs[0].getvalue()) else: # use partid as the key for i in range(len(rs)): if i not in self.result: self.result[i] = [rs[i]] else: self.result[i].append(rs[i]) if self.is_checkpoint == 1: for i in range(len(rs)): self.write_data_to_disk(i, rs[i].getvalue()) # check if the whole data exceeds the LIMITATION if rs_type != self.MULTI_FILE: if self.is_checkpoint is not None and self.is_checkpoint == conf.CHECKPOINT or self.total_size > self.MAX_SIZE: for dataid in self.result: data = "" for d in self.result[dataid]: data += d.getvalue() self.write_data_to_disk(dataid, data) self.result_type = self.MULTI_FILE e_time = time.time() if self.total_time == 0: self.total_time = (e_time - s_time) self.send_status_to_master(jobid, conf.PENDING) elif exp.lower().startswith("create"): ParaLiteLog.info("proc_create: START") ParaLiteLog.info("SQL: %s" % exp) self.proc_create(exp, target_db) ParaLiteLog.info("proc_create: START") self.send_status_to_master(jobid, conf.ACK) self.is_running = False elif exp.lower().startswith("drop"): ParaLiteLog.info("proc_drop: START") self.proc_drop(exp, target_db) self.send_status_to_master(jobid, conf.ACK) self.is_running = False ParaLiteLog.debug("*****JOB %s******:finish" % jobid) self.ex_w_time += (time.time() - self.ex_s_time) self.ex_s_time = 0 elif m[0] == conf.JOB_END: ParaLiteLog.debug("MESSAGE: %s" % message) # all jobs are finished # create a dictionary to store the status of each part of data data_status = {} # {data_id:[(pos_in_result, status)]} for dataid in self.result: if dataid not in data_status: data_status[dataid] = [] for i in range(len(self.result[dataid])): data_status[dataid].append((i, 1)) self.data_status = data_status self.reader = self.get_next_reader() self.send_rs_info_to_master(self.total_size, self.total_time) # distribute data if self.dest == conf.DATA_TO_ONE_CLIENT: self.distribute_data() self.send_status_to_master(" ".join(self.job_data), conf.ACK) self.is_running = False elif self.dest == conf.DATA_TO_DB: self.distribute_data() elif m[0] == conf.DLOAD_REPLY: reply = sep.join(m[1:]) ParaLiteLog.info("receive the information from the master") ParaLiteLog.debug(reply) if len(self.data.getvalue()) != 0: dload_client.dload_client().load_internal_buffer( reply, self.dest_table, self.data, self.fashion, self.hash_key, self.hash_key_pos, self.db_col_sep, self.db_row_sep, self.db_col_sep, False, "0", self.log_dir) # send END_TAG to the master client_id = "0" msg = sep.join([conf.REQ, conf.END_TAG, gethostname(), client_id]) so_master = socket(AF_INET, SOCK_STREAM) so_master.connect((self.master_name, self.master_port)) so_master.send("%10s%s" % (len(msg), msg)) so_master.close() ParaLiteLog.debug("sending to master: %s" % (conf.END_TAG)) ParaLiteLog.debug("----- dload client finish -------") elif m[0] == conf.DATA_PERSIST: ParaLiteLog.debug("MESSAGE: %s" % message) # if the data is requried to be persisted or not self.process_ck_info(m) elif m[0] == conf.DATA_DISTRIBUTE: ParaLiteLog.debug("MESSAGE: %s" % message) # send a part of data to the next operator # DATA_DISTRIBUTE:partition_num:destnode part_id, destnode = m[1:] data = self.get_data_by_part_id(self.result, string.atoi(part_id)) # DATA message includes: type:id+data # the first 2 chars represents the opid msg = sep.join([conf.DATA, "%2s%s" % (self.opid, data)]) if destnode == gethostname(): # use local socket addr = self.p_node[destnode][1] t = AF_UNIX else: addr = (destnode, self.p_node[destnode][0]) t = AF_INET self.send_data_to_node(msg, t, addr) ParaLiteLog.debug("send data susscufully %s %s --> %s" % (self.opid, gethostname(), destnode)) elif m[0] == conf.DATA_DISTRIBUTE_UDX: ParaLiteLog.debug("MESSAGE: %s" % message) # send data to udx client # m[1:] = worker.id:jobid:(node:port | addr):size if len(m) == 6: w_id, jobid = m[1:3] addr = (m[3], string.atoi(m[4])) t = AF_INET bk = string.atoi(m[5]) elif len(m) == 5: w_id, jobid = m[1:3] addr = m[3] t = AF_UNIX bk = string.atoi(m[4]) data = self.get_data_by_blocksize(jobid, bk) if not data: # if we don't send something here, udx will not send KAL # again, and then they will not receive data again, the whole # process will be blocked for ever msg = sep.join([conf.DATA, "EMPTY"]) else: msg = sep.join([conf.DATA, data]) self.send_data_to_node(msg, t, addr) elif m[0] == conf.DATA_REPLICA: ParaLiteLog.debug("MESSAGE: %s" % message) # message --> DATA_REPLICA:DATANODE:DATAID:DATA datanode, dataid = m[1:3] f_name = self.get_file_name_by_data_id(gethostname(), dataid) fr = open(f_name, "wa") fr.write(m[4]) fr.close() elif m[0] == conf.NODE_FAIL: ParaLiteLog.debug("MESSAGE: %s" % message) # message --> NODE_FAIL:FAILED_NODE:REPLICA_NODE failed_node, replica_node = m[1:3] self.failed_node.append(failed_node) if replica_node == gethostname(): # load replica data for the failed node self.recovery_data(self.replica_result, replica_node) except Exception, e: es("in sql_proc : %s" % traceback.format_exc()) ParaLiteLog.info(traceback.format_exc()) self.is_running = False self.no_error = False
def load_internal_file(self, reply, opt, db_col_sep, LOG_DIR): ParaLiteLog.info("load_internal_file: START") table = opt.table files = opt.files col_sep = opt.col_sep row_sep = opt.row_sep fashion = opt.fashion key = opt.key key_pos = opt.key_pos is_replace = opt.replace self.db_col_sep = db_col_sep for f in files: self.files[f] = 1 self.file_reader = open(self.get_next_file(), "rb") try: """ received message = nodes # sub_dbs # chunk_num # replica_info nodes should be (| is SEP_IN_MSG): n1 : p1|l1 , n2 : p2|l2 , ... IF fashion = HASH_FASHION n1 : p1|l1|s1|num , n2 : p2|l2|s2|num , ... IF fashion = ROUND_ROBIN TBD IF fashion = RANGE_FASHION node_db_info: node1:[db_1_1] , node2:[db_1_2] , node3:[db_2_1], ... replica_info: db_1_1 db_1_1_r_1 node1 , db_1_2 db_1_2_r_1 node2 , ... """ mm = reply.split("#") nodes = mm[0].split(",") sub_dbs = mm[1].split(",") chunk_num = string.atoi(mm[2]) replica = mm[3] replica_info = {} # {db_name : {replica_db_name:node}} if replica != "": for whole_re in replica.split(","): lll = whole_re.split(" ") if lll[0] not in replica_info: replica_info[lll[0]] = {} replica_info[lll[0]][lll[1]] = lll[2] node_addr = {} # {node:addr} for node in nodes: m = node.split(conf.SEP_IN_MSG) if m[0] == gethostname(): addr = m[2] else: addr = (m[0], string.atoi(m[1])) node_addr[m[0]] = addr thds = [] if nodes == []: ParaLiteLog.info("there is no data to load") elif fashion == conf.HASH_FASHION: ParaLiteLog.info(fashion) if row_sep is not None and row_sep != "\n": while True: dst = self.get_data_as_bk(DATA_MAX_SIZE) if dst is None: ParaLiteLog.info("really get data as bk: 0") break ParaLiteLog.info("really get data as bk: %s" % (len(dst))) pos = dst.rfind(row_sep) ds = left_ds + dst[0:pos] left_ds = dst[pos+len(row_sep):] del dst db_buf = self.hash_data_file(ds, key_pos, nodes, row_sep, col_sep, chunk_num, sub_dbs) ParaLiteLog.debug("hash data finish %s" % len(ds)) del ds for db in db_buf: data = db_buf[db].getvalue() node = db.split("_")[-3] thd = threading.Thread(target=self.send_to_node, args=(db, table, data, node_addr[node], row_sep,col_sep, is_replace)) thd.setDaemon(True) thd.start() thds.append(thd) if db in replica_info: for rdb in replica_info[db]: node = replica_info[db][rdb] self.send_to_node(rdb, table, data, node_addr[node], row_sep, col_sep, is_replace) else: while True: ds = self.get_data_as_bk(DATA_MAX_SIZE) if ds is None: ParaLiteLog.info("really get data as bk: 0") break ParaLiteLog.info("really get data as bk: %s" % (len(ds))) db_buf = self.hash_data_file(ds, key_pos, nodes, "\n", col_sep, chunk_num, sub_dbs) for db in db_buf: ParaLiteLog.debug( "%s -- > %s" % (db, len(db_buf[db].getvalue()))) break for db in db_buf: data = db_buf[db].getvalue() node = db.split("_")[-3] thd = threading.Thread(target=self.send_to_node, args=(db, table, data, node_addr[node], row_sep,col_sep, is_replace)) thd.setDaemon(True) thd.start() thds.append(thd) if db in replica_info: for rdb in replica_info[db]: node = replica_info[db][rdb] self.send_to_node(rdb, table, data, node_addr[node], row_sep, col_sep, is_replace) del db_buf del ds elif fashion == conf.REPLICATE_FASHION: self.replicate_data(table, files, total_size, nodes) elif fashion == conf.RANGE_FASHION: self.range_data() else: num_of_db = len(nodes) * chunk_num if row_sep is not None and row_sep != "\n": i = 0 left_ds = "" while True: db = sub_dbs[i % num_of_db] #m = nodes[(i % num_of_db) / chunk_num].split(conf.SEP_IN_MSG) #node = m[0] node = db.split("_")[-3] size = string.atoi(m[3]) / chunk_num + 1 if size > DATA_MAX_SIZE: ParaLiteLog.info("start to get data as bk: %s" % (DATA_MAX_SIZE)) ds = self.get_data_as_bk(DATA_MAX_SIZE) else: ParaLiteLog.info("start to get data as bk: %s" % (size)) ds = self.get_data_as_bk(size) if ds is None: ParaLiteLog.info("really get data as bk: 0") break ParaLiteLog.info("really get data as bk: %s" % (len(ds))) pos = ds.rfind(row_sep) send_ds = left_ds + ds[0:pos] left_ds = ds[pos+len(row_sep):] thd = threading.Thread( target=self.send_to_node, args=(db, table, send_ds, node_addr[node], row_sep, col_sep, is_replace)) thd.setDaemon(True) thd.start() thds.append(thd) if db in replica_info: for rdb in replica_info[db]: node = replica_info[db][rdb] thd = threading.Thread( target=self.send_to_node, args=(rdb, table, ds, node_addr[node], row_sep, col_sep, is_replace)) thd.setDaemon(True) thd.start() thds.append(thd) i += 1 else: i = 0 while True: db = sub_dbs[i % num_of_db] #m = nodes[(i % num_of_db)/chunk_num].split(conf.SEP_IN_MSG) #node = m[0] node = db.split("_")[-3] size = string.atoi(m[3]) / chunk_num + 1 if size > DATA_MAX_SIZE: ParaLiteLog.info( "start to get data as bk: %s" % (DATA_MAX_SIZE)) ds = self.get_data_as_bk(DATA_MAX_SIZE) else: ParaLiteLog.info("start to get data as bk: %s" % (size)) ds = self.get_data_as_bk(size) if ds is None: ParaLiteLog.info("really get data as bk: 0") break ParaLiteLog.info("really get data as bk: %s" % (len(ds))) thd = threading.Thread( target=self.send_to_node, args=(db, table, ds, node_addr[node], row_sep,col_sep, is_replace)) thd.setDaemon(True) thd.start() thds.append(thd) if db in replica_info: for rdb in replica_info[db]: ParaLiteLog.info(rdb) node = replica_info[db][rdb] thd = threading.Thread( target=self.send_to_node, args=(rdb, table, ds, node_addr[node], row_sep, col_sep, is_replace)) thd.setDaemon(True) thd.start() thds.append(thd) i += 1 del ds for thd in thds: thd.join() except Exception, e: ParaLiteLog.debug(traceback.format_exc()) raise(Exception(traceback.format_exc()))
record_num += 1 except sqlite3.OperationalError,e: es("sqlite3.OperationalError: %s" % traceback.format_exc()) ParaLiteLog.info(traceback.format_exc()) sys.exit(1) ParaLiteLog.info("record_num is %s" % (record_num)) con.commit() cr.close() con.close() ParaLiteLog.info("%s: FINISH" % (self.write_to_db.__name__)) self.cur_db.table_added_record += record_num self.cur_db.table_added_size += size self.cur_db.size += size return if self.cmd_row_sep is not None and self.cmd_row_sep != "None" and self.cmd_row_sep != conf.NEW_LINE: ParaLiteLog.info("cmd_row_sep %s" % (self.cmd_row_sep)) ParaLiteLog.info("db = %s" % (db)) try: ParaLiteLog.info("LOAD: insert one by one") con = sqlite3.connect(db) con.text_factory = str cr = con.cursor() lines = data.split(self.cmd_row_sep) template = None for line in lines: if line == "": continue x = tuple([ s for s in string.strip(line).split(self.cmd_col_sep)]) if template is None: questions = ",".join([ "?" ] * len(x)) template = "insert into %s values(%s);" % (self.table, questions) cr.execute(template, x)