Beispiel #1
0
 def start_load(self, master, cqid, node, database, tag, db_col_sep, opt):
     logging("start to load")
     table = opt.table
     files = opt.files
     fashion = opt.fashion
     key = opt.key
     key_pos = opt.key_pos
     row_sep = opt.row_sep
     col_sep = opt.col_sep
     dload_client.dload_client().load_internal(master, cqid, node, database, table,
                                               files, 0, fashion, key, key_pos,
                                               db_col_sep, row_sep, col_sep,
                                               opt.replace, "0",
                                               self.defaultconf[conf.LOG_DIR])
     logging("%s: FINISH" % (self.start_load.__name__))
Beispiel #2
0
    def handle_read(self, event):
        message = event.data[10:]

        m = message.split(conf.SEP_IN_MSG)
        try:
            if m[0] == conf.JOB_ARGUMENT:
                self.parse_args(m[1])
                ParaLiteLog.info("parse arguments: FINISH")

            elif m[0] == conf.JOB:
                ParaLiteLog.debug("MESSAGE: %s" % message)
                self.cur_jobid = m[1]

            elif m[0] == conf.DATA:
                data_id = string.strip(m[1][0:2])
                data = m[1][2:]
                self.source_data.append(data)

                # sort data
                if not self.is_data_ready(self.source_data, self.num_of_children):
                    return

                ParaLiteLog.debug("****SORT DATA****: start")
                s = 0
                for data in self.source_data:
                    s += len(data)
                ParaLiteLog.debug("source data size: %s" % s)
                s_time = time.time()
                rs_type, rs, t_size = self.sort(self.source_data)
                del self.source_data
                ParaLiteLog.debug("****SORT DATA****: finish")

                if rs_type is None:
                    self.send_status_to_master(self.cur_jobid, conf.PENDING)
                    return

                self.total_size += t_size
                self.source_data = {}

                # store the result of one job to the final result
                for i in range(len(rs)):
                    if i not in self.result:
                        self.result[i] = [rs[i]]
                    else:
                        self.result[i].append(rs[i])

                if rs_type != conf.MULTI_FILE:
                    # check if the whole data exceeds the LIMITATION
                    if self.total_size > self.MAX_SIZE:
                        self.write_data_to_disk()
                        self.result_type = conf.MULTI_FILE

                e_time = time.time()
                self.total_time += e_time - s_time

                self.send_status_to_master(self.cur_jobid, conf.PENDING)

            elif m[0] == conf.JOB_END:
                ParaLiteLog.debug("MESSAGE: %s" % message)
                # all jobs are finished
                self.send_rs_info_to_master(self.total_size, self.total_time)

                # distribute data
                if self.dest == conf.DATA_TO_ONE_CLIENT:
                    ParaLiteLog.debug("dest = %s" % self.dest)
                    self.distribute_data()
                    self.send_status_to_master(self.cur_jobid, conf.ACK)
                    self.is_running = False
                elif self.dest == conf.DATA_TO_DB:
                    self.distribute_data()

            elif m[0] == conf.DATA_PERSIST:
                # if the data is requried to be persisted or not
                if m[1] == conf.CHECKPOINT:
                    self.write_data_to_disk()

            elif m[0] == conf.DLOAD_REPLY:
                sep = conf.SEP_IN_MSG
                reply = sep.join(m[1:])
                ParaLiteLog.info("receive the information from the master")
                ParaLiteLog.debug(reply)

                if len(self.data.getvalue()) != 0:
                    dload_client.dload_client().load_internal_buffer(
                        reply,
                        self.dest_table,
                        self.data,
                        self.fashion,
                        self.hash_key,
                        self.hash_key_pos,
                        self.db_col_sep,
                        self.db_row_sep,
                        self.db_col_sep,
                        False,
                        "0",
                        self.log_dir,
                    )

                # send END_TAG to the master
                client_id = "0"
                msg = sep.join([conf.REQ, conf.END_TAG, gethostname(), client_id])
                so_master = socket(AF_INET, SOCK_STREAM)
                so_master.connect((self.master_name, self.master_port))
                so_master.send("%10s%s" % (len(msg), msg))
                so_master.close()
                ParaLiteLog.debug("sending to master: %s" % (conf.END_TAG))
                ParaLiteLog.debug("----- dload client finish -------")

            elif message == conf.DLOAD_END_TAG:
                ParaLiteLog.debug("---------import finish---------")
                self.send_status_to_master(" ".join(self.cur_jobid), conf.ACK)
                self.is_running = False

            elif m[0] == conf.EXIT:
                self.is_running = False

            elif m[0] == conf.NODE_FAIL:
                ParaLiteLog.debug("MESSAGE: %s" % message)
                # message --> NODE_FAIL:FAILED_NODE:REPLICA_NODE
                failed_node, replica_node = m[1:3]
                self.failed_node.append(failed_node)
                if replica_node != "" and replica_node == gethostname():
                    # load replica data for the failed node
                    self.recovery_data(self.replica_result, replica_node)
                ParaLiteLog.debug("Finish to handle node failure message")

        except Exception, e:
            es(traceback.format_exc())
            ParaLiteLog.info(traceback.format_exc())
            self.is_running = False
            self.no_error = False
Beispiel #3
0
                else:
                    addr = (destnode, self.p_node[destnode][0])
                    t = AF_INET
                self.send_data_to_node(msg, t, addr)
                ParaLiteLog.debug(
                    "send data susscufully   %s %s --> %s" % (
                        self.opid, gethostname(), destnode))

            elif m[0] == conf.DLOAD_REPLY:
                reply = sep.join(m[1:])
                ParaLiteLog.info("receive the information from the master")
                ParaLiteLog.debug(reply)
                
                if len(self.data.getvalue()) != 0:
                    dload_client.dload_client().load_internal_buffer(
                        reply, self.dest_table, self.data, self.fashion, 
                        self.hash_key, self.hash_key_pos, self.db_col_sep, 
                        self.db_row_sep, self.db_col_sep, False, "0", self.log_dir)

                # send END_TAG to the master
                client_id = "0"
                msg = sep.join([conf.REQ, conf.END_TAG, gethostname(), client_id])
                so_master = socket(AF_INET, SOCK_STREAM)
                so_master.connect((self.master_name, self.master_port))
                so_master.send("%10s%s" % (len(msg), msg))
                so_master.close()
                ParaLiteLog.debug("sending to master: %s" % (conf.END_TAG))
                ParaLiteLog.debug("----- dload client finish -------")

            elif message == conf.DLOAD_END_TAG:
                ParaLiteLog.debug("---------import finish---------")
                self.send_status_to_master(" ".join(self.job_data), conf.ACK)
Beispiel #4
0
    def handle_read(self, event):
        message = event.data[10:]

        sep = conf.SEP_IN_MSG
        m = message.split(sep)
        try:
            if m[0] == conf.DATA_END:
                ParaLiteLog.debug("MESSAGE: %s" % message)
                # all data is dipatched to the parent nodes
                self.send_status_to_master(" ".join(self.job_data), conf.ACK)
                ParaLiteLog.debug("notify ACK to master")                    
                self.is_running = False
                
            elif message == conf.END_TAG:
                ParaLiteLog.debug("MESSAGE: %s" % message)
                self.send_status_to_master(" ".join(self.job_data), conf.ACK)
                self.is_running = False

            elif message == conf.DLOAD_END_TAG:
                ParaLiteLog.debug("---------import finish---------")
                self.send_status_to_master(" ".join(self.job_data), conf.ACK)
                self.is_running = False

            elif message == conf.EXIT:
                ParaLiteLog.debug("MESSAGE: %s" % message)
                self.is_running = False
            
            elif m[0] == conf.JOB_ARGUMENT:
                self.parse_args(m[1])
                ParaLiteLog.info("parse arguments: FINISH")
                # init the persisted result data
                if self.is_checkpoint is not None and self.is_checkpoint == conf.CHECKPOINT:
                    ParaLiteLog.debug("recovery data: START")
                    # this is a recovery operator
                    self.recovery_data(self.result, gethostname())
                    ParaLiteLog.debug("recovery data: FINISH")
                    self.send_rs_info_to_master(0, 0)
                else:
                    # delete all temporary files for this operator
                    os.system("rm -f %s/%s_%s" % (self.temp_dir, "sql", self.opid))

                ###############################
                # scanner = threading.Thread(target=self.scan_process_queue, args=(self.process_queue, ))
                # scanner.setDaemon(True)
                # scanner.start()
                # self.threads.append(scanner)
                ##########################
            elif m[0] == conf.JOB:
                self.ex_s_time = time.time()
                self.ex_w_time = 0
                ParaLiteLog.debug("MESSAGE: %s" % message)
                s_time = time.time()
                jobid = m[1]
                target_db = m[2].split()
                exp = self.expression
                ParaLiteLog.debug("*****JOB %s******:start" % jobid)
                
                # FAULT TOLERANCE:
                if jobid in self.job_data:
                    # this is a failed job, we should first delete the old result value
                    if self.dest == conf.DATA_TO_ANO_OP and self.partition_num > 1:
                        for partid in self.result:
                            pos = self.job_list.index(jobid)
                            self.result[partid][pos] = ""
                    else:
                        self.result[jobid] = ""
                
                if exp.lower().startswith("select"):
                    """
                    selection task: (1), execute sql (2), notify the result to
                    the master (3), wait for the DATA_PERSIST message from the
                    master (4), persist data if so (5), notify ACK to the master
                    """
                    ParaLiteLog.info("proc_select: START")
                    st_time = time.time()
                    
                    rs_type, rs, t_size = self.proc_select(jobid, exp, target_db)

                    et_time = time.time()
                    ParaLiteLog.debug("Job %s cost time %s second" % (jobid, (et_time - st_time)))
                    # FAULT TOLERANCE:
                    if jobid in self.job_data:
                        # this is a failed job
                        if self.dest == conf.DATA_TO_ANO_OP and self.partition_num > 1:
                            for partid in self.result:
                                pos = self.job_list.index(jobid)
                                self.result[partid][pos] = rs[partid]
                        else:
                            self.result[jobid] = rs
                        self.send_status_to_master(jobid, conf.PENDING)
                        return
                        
                    self.job_data[jobid] = t_size
                    self.job_list.append(jobid)
                    self.total_size += t_size
                    
                    # store the result of one job to the final result
                    if len(rs) == 1:
                        if self.dest == conf.DATA_TO_ANO_OP:
                            # dest is AGGR op or ORDER op, use 0 as the key
                             if 0 not in self.result:
                                 self.result[0] = rs
                             else:
                                 self.result[0].append(rs[0])

                             if self.is_checkpoint == 1:
                                 self.write_data_to_disk(0, rs[0].getvalue())
                        else:
                            # dest is UDX op, use jobid as the key
                            self.result[string.atoi(jobid)] = rs
                            if self.is_checkpoint == 1:
                                self.write_data_to_disk(0, rs[0].getvalue())
                        
                    else:
                        # use partid as the key
                        for i in range(len(rs)):
                            if i not in self.result:
                                self.result[i] = [rs[i]]
                            else:
                                self.result[i].append(rs[i])
                        if self.is_checkpoint == 1:
                            for i in range(len(rs)):
                                self.write_data_to_disk(i, rs[i].getvalue())
                        
                    # check if the whole data exceeds the LIMITATION
                    if rs_type != self.MULTI_FILE:
                        if self.is_checkpoint is not None and self.is_checkpoint == conf.CHECKPOINT or self.total_size > self.MAX_SIZE:
                            for dataid in self.result:
                                data = ""
                                for d in self.result[dataid]:
                                    data += d.getvalue()
                                self.write_data_to_disk(dataid, data)
                            self.result_type = self.MULTI_FILE
                            
                    e_time = time.time()
                    if self.total_time == 0:
                        self.total_time = (e_time - s_time)
                    self.send_status_to_master(jobid, conf.PENDING)

                elif exp.lower().startswith("create"):
                    ParaLiteLog.info("proc_create: START")
                    ParaLiteLog.info("SQL: %s" % exp)                    
                    self.proc_create(exp, target_db)
                    ParaLiteLog.info("proc_create: START")
                    self.send_status_to_master(jobid, conf.ACK)
                    self.is_running = False
                elif exp.lower().startswith("drop"):
                    ParaLiteLog.info("proc_drop: START")            
                    self.proc_drop(exp, target_db)
                    self.send_status_to_master(jobid, conf.ACK)
                    self.is_running = False
                ParaLiteLog.debug("*****JOB %s******:finish" % jobid)
                self.ex_w_time += (time.time() - self.ex_s_time)
                self.ex_s_time = 0

            elif m[0] == conf.JOB_END:
                ParaLiteLog.debug("MESSAGE: %s" % message)
                # all jobs are finished
                # create a dictionary to store the status of each part of data
                data_status = {}  # {data_id:[(pos_in_result, status)]}
                for dataid in self.result:
                    if dataid not in data_status:
                        data_status[dataid] = []
                    for i in range(len(self.result[dataid])):
                        data_status[dataid].append((i, 1))
                self.data_status = data_status
                self.reader = self.get_next_reader()
                    
                self.send_rs_info_to_master(self.total_size, self.total_time)

                # distribute data
                if self.dest == conf.DATA_TO_ONE_CLIENT:
                    self.distribute_data()
                    self.send_status_to_master(" ".join(self.job_data), conf.ACK)
                    self.is_running = False
                elif self.dest == conf.DATA_TO_DB:
                    self.distribute_data()
 
            elif m[0] == conf.DLOAD_REPLY:
                reply = sep.join(m[1:])
                ParaLiteLog.info("receive the information from the master")
                ParaLiteLog.debug(reply)
                
                if len(self.data.getvalue()) != 0:
                    dload_client.dload_client().load_internal_buffer(
                        reply, self.dest_table, self.data, self.fashion, 
                        self.hash_key, self.hash_key_pos, self.db_col_sep, 
                        self.db_row_sep, self.db_col_sep, False, "0", self.log_dir)

                # send END_TAG to the master
                client_id = "0"
                msg = sep.join([conf.REQ, conf.END_TAG, gethostname(), client_id])
                so_master = socket(AF_INET, SOCK_STREAM)
                so_master.connect((self.master_name, self.master_port))
                so_master.send("%10s%s" % (len(msg), msg))
                so_master.close()
                ParaLiteLog.debug("sending to master: %s" % (conf.END_TAG))
                ParaLiteLog.debug("----- dload client finish -------")

            elif m[0] == conf.DATA_PERSIST:
                ParaLiteLog.debug("MESSAGE: %s" % message)
                # if the data is requried to be persisted or not
                self.process_ck_info(m)
                
            elif m[0] == conf.DATA_DISTRIBUTE:
                ParaLiteLog.debug("MESSAGE: %s" % message)
                # send a part of data to the next operator
                # DATA_DISTRIBUTE:partition_num:destnode
                part_id, destnode = m[1:]
                data = self.get_data_by_part_id(self.result, string.atoi(part_id))
                
                # DATA message includes: type:id+data
                # the first 2 chars represents the opid
                msg = sep.join([conf.DATA, "%2s%s" % (self.opid, data)])
                if destnode == gethostname():
                    # use local socket
                    addr = self.p_node[destnode][1]
                    t = AF_UNIX
                else:
                    addr = (destnode, self.p_node[destnode][0])
                    t = AF_INET
                self.send_data_to_node(msg, t, addr)
                ParaLiteLog.debug("send data susscufully   %s %s --> %s" % (self.opid, gethostname(), destnode))

            elif m[0] == conf.DATA_DISTRIBUTE_UDX:
                ParaLiteLog.debug("MESSAGE: %s" % message)
                # send data to udx client
                # m[1:] = worker.id:jobid:(node:port | addr):size
                
                if len(m) == 6:
                    w_id, jobid = m[1:3]
                    addr = (m[3], string.atoi(m[4]))
                    t = AF_INET
                    bk = string.atoi(m[5])
                elif len(m) == 5:
                    w_id, jobid = m[1:3]
                    addr = m[3]
                    t = AF_UNIX
                    bk = string.atoi(m[4])
                data = self.get_data_by_blocksize(jobid, bk)
                if not data:
                    # if we don't send something here, udx will not send KAL
                    # again, and then they will not receive data again, the whole
                    # process will be blocked for ever
                    msg = sep.join([conf.DATA, "EMPTY"])
                else:
                    msg = sep.join([conf.DATA, data])
                self.send_data_to_node(msg, t, addr)
                
            elif m[0] == conf.DATA_REPLICA:
                ParaLiteLog.debug("MESSAGE: %s" % message)
                # message --> DATA_REPLICA:DATANODE:DATAID:DATA
                datanode, dataid = m[1:3]
                f_name = self.get_file_name_by_data_id(gethostname(), dataid)
                fr = open(f_name, "wa")
                fr.write(m[4])
                fr.close()

            elif m[0] == conf.NODE_FAIL:
                ParaLiteLog.debug("MESSAGE: %s" % message)
                # message --> NODE_FAIL:FAILED_NODE:REPLICA_NODE
                failed_node, replica_node = m[1:3]
                self.failed_node.append(failed_node)
                if replica_node == gethostname():
                    # load replica data for the failed node
                    self.recovery_data(self.replica_result, replica_node)
        except Exception, e:
            es("in sql_proc : %s" % traceback.format_exc())
            ParaLiteLog.info(traceback.format_exc())
            self.is_running = False
            self.no_error = False
Beispiel #5
0
    def distribute(self, data):
        sep = conf.SEPERATOR
        if self.dest == conf.DATA_TO_CLIENTS:
            # This is final result, store them to a file
            dest_file = conf.OUTPUT_DIR + self.dest
            file_handler = io.FileIO(dest_file, 'wb', closefd=True)
            bw = io.BufferedWriter(file_handler, buffer_size=65536)
            for row in data:
                bw.write(row)
                bw.write('\n')
            bw.close()
            addr = (conf.MASTER_NAME, string.atoi(conf.MASTER_PORT))
            sock = socket(AF_INET, SOCK_STREAM)
            sock.connect(addr)
            msg = '%s:%s:%s:%s' % (conf.ACK, conf.DATA_NODE, self.cqid, gethostname())
            sock.send('%10s%s' % (len(msg), msg))
        elif self.dest == conf.DATA_TO_ANO_OP:
            # This is intermediate data, pipeline it
            num_of_dest = len(self.p_node)
            if num_of_dest == 1:
                port = self.p_node.keys()[0]
                node = self.p_node[port]
                queue = cqueue.cqueue(node, port)
                queue.connect()
                # The first message is the output attrs of this sql
                msg = ''
                for out in self.output:
                    msg += '%s%s' % (out, sep)
                queue.put(msg)
                queue.put(data.getvalue())
                queue.close()
            elif num_of_dest > 1:
                queue_list = []
                """
                for p in self.p_port:
                    queue = cqueue(p.split(':')[0],string.atoi(p.split(':')[1]))
                    queue.connect()
                    # The first message is the output attrs of this sql
                    msg = ''
                    for out in self.output:
                        msg += '%s%s' % (out, sep)
                    queue.put(msg)
                    queue_list.append(queue)
                """
                for port in self.p_node:
                    queue = cqueue(self.p_node[port],port)
                    queue.connect()
                    # The first message is the output attrs of this sql
                    msg = ''
                    for out in self.output:
                        msg += '%s%s' % (out, sep)
                    queue.put(msg)
                    queue_list.append(queue)

                if self.split_key != None:
                    # partition data in hash fashion
                    split_key = self.split_key
                    if columns[0].find('.') == -1 and len(split_key.split('.')) == 2:
                        pos = columns.index(split_key.split('.')[1])
                    else:
                        pos = columns.index(split_key)
                    for row in rs:
                        partition_num = abs(hash(row[pos])) % num_of_dest
                        queue = queue_list[partition_num]
                        msg = ''
                        for r in data:
                            msg += '%s%s' % (str(r), sep)
                        queue.put(msg)
                else:
                    # partitioning data in range fashion
                    for i in range(len(data)):
                        partition_num = i % num_of_dest
                        queue = queue_list[partition_num]
                        msg = ''
                        for r in data:
                            msg += '%s%s' % (str(r), sep)
                        queue.put(msg)
                for queue in queue_list:
                    queue.close()
        elif self.dest == conf.DATA_TO_ONE_CLIENT:
            sock = socket(AF_INET, SOCK_STREAM)
            sock.connect(self.client)
            sock.send('%10sDATA' % (4))
            sock.send("%10s%s" % (len(data.getvalue()), data.getvalue()))
            sock.close()
            self.notify_to_master()
        elif self.dest == conf.DATA_TO_DB:
            """
            out_name = "%s%sgroup_%s" % (LOG_DIR, os.sep, self.cqid)
            f = open(out_name, "wb")
            f.write(data.getvalue())
            f.close()
            """
            col_sep = self.db_col_sep
            row_sep = self.db_row_sep
            master = (self.master_name, self.master_port)
            dload_client.dload_client().load_internal(master, self.cqid, gethostname(),self.dest_db, self.dest_table, data, 1, self.fashion, self.hash_key, self.hash_key_pos, col_sep, row_sep, col_sep, False, "0", LOG_DIR)
            self.notify_to_master()
            #os.remove(out_name)
        else:
            random_num = random.randint(0, len(self.client_sock) - 1)
            addr = self.client_sock[random_num]
            sock = socket(AF_INET, SOCK_STREAM)
            sock.connect(addr)
            d = string.strip(data.getvalue())
            sock.send("%10s%s" % (len(d), d))
            sock.close()
            self.notify_to_master()