Example #1
0
    def distribute_data(self):
        whole_data = cStringIO.StringIO()
        for i in self.result:
            for csio in self.result[i]:
                d = string.strip(csio.getvalue())
                if len(d) == 0:
                    continue
                whole_data.write(d)
                whole_data.write("\n")
                del csio
            
        if self.distinct or self.limit != -1:
            data_list = whole_data.getvalue().split(self.db_row_sep)
            del whole_data
        
            if self.distinct:
                data_list = set(data_list)
            if self.limit != -1:
                data_list = data_list[:self.limit]

            data = cStringIO.StringIO()
            data.write(self.db_row_sep.join(str(s) for s in data_list))
            del data_list
        else:
            data = whole_data

        if self.dest == conf.DATA_TO_ONE_CLIENT:
            # send data to a random client
            random_num = random.randint(0, len(self.client_sock) - 1)
            addr = self.client_sock[random_num]
            sock = socket(AF_INET, SOCK_STREAM)
            sock.connect(addr)
            data_s = data.getvalue()
            sock.send("%10s%s" % (len(data_s), data_s))
            re = sock.recv(10)
            assert re == "OK"
            sock.close()

        elif self.dest == conf.DATA_TO_DB:
            self.data = data
            col_sep = self.db_col_sep
            row_sep = self.db_row_sep
            master = (self.master_name, self.master_port)
            ParaLiteLog.debug("Load data start:")
            # send request to the master
            t_size = len(data.getvalue())
            sep = conf.SEP_IN_MSG
            tag = conf.LOAD_FROM_API
            if row_sep is None or row_sep == "\n":
                temp_sep = "NULL"
            else:
                temp_sep = row_sep
            msg = sep.join(
                str(s) for s in [conf.REQ, self.cqid, gethostname(), 
                                 self.my_port, self.dest_db, self.dest_table,
                                 t_size, tag, self.fashion, temp_sep, "0"])
            so_master = socket(AF_INET, SOCK_STREAM)
            so_master.connect(master)
            so_master.send("%10s%s" % (len(msg),msg))
            so_master.close()
Example #2
0
    def process_ck_info(self, message):
        # message --> DATA_PERSIST:CHECKPOINT[:REPLICA_NODE:REPLICA_PORT]
        is_ck = message[1]
        self.is_checkpoint = is_ck
        if is_ck == conf.CHECKPOINT:
            replica_addr = (message[2], string.atoi(message[3]))
            if self.result_type != conf.MULTI_FILE:
                for dataid in self.result:
                    ds = ""
                    for data in self.result[dataid]:
                        ds += data.getvalue()
                    self.write_data_to_disk(dataid, ds)

                    f_name = self.get_file_name_by_data_id(gethostname(), dataid)
                    cmd = "scp %s %s:%s" % (f_name, replica_addr[0], f_name)
                    ParaLiteLog.debug("CDM: %s" % cmd)
                    os.system(cmd)

            else:
                for dataid in self.result:
                    f = open(self.result[dataid], "rb")
                    while True:
                        ds = f.read(self.max_size)
                        if not ds:
                            break
                        msg = conf.SEP_IN_MSG.join(
                            [conf.DATA_REPLICA, gethostname(), str(dataid), ds])
                        self.send_data_to_node(msg, AF_INET, replica_addr)
Example #3
0
    def hash_data_file(self, data, key_pos, nodes, row_sep, col_sep, chunk_num, sub_dbs):
        sep = conf.SEP_IN_MSG
        db_buf = {}
        for db in sub_dbs:
            db_buf[db] = cStringIO.StringIO()
        if col_sep is None: SEP = self.db_col_sep
        else: SEP = col_sep
        records = data.split(row_sep)
        count = 1
        for line in records:
            if line == "":
                continue
            key = " ".join(line.strip().split(SEP)[kp] for kp in key_pos)
            pnum = abs(hash(key)) % (len(nodes)*chunk_num)
            if pnum == 0:
                count += 1
            db_name = sub_dbs[pnum]
            db_buf[db_name].write("%s%s" % (line.strip(), row_sep))
            
        ParaLiteLog.debug("count %s" % count)
        
        for db in db_buf:
            ParaLiteLog.debug("%s -- > %s" % (db, len(db_buf[db].getvalue())))
            break

        return db_buf
Example #4
0
    def start(self):
        try:
            # start socket server to listen all connections
            ch = self.iom.create_server_socket(AF_INET, SOCK_STREAM, 100, ("", self.my_port))
            n, self.my_port = ch.ss.getsockname()
            ParaLiteLog.debug("listen on port : %s ..." % str(self.my_port))

            # start socket server for local connections
            self.local_addr = "/tmp/paralite-local-addr-orderby-%s-%s-%s" % (gethostname(), self.cqid, self.opid)
            if os.path.exists(self.local_addr):
                os.remove(self.local_addr)
            self.iom.create_server_socket(AF_UNIX, SOCK_STREAM, 10, self.local_addr)

            # register local port to the master
            self.register_to_master(self.cqid, self.opid, gethostname(), self.my_port)
            ParaLiteLog.debug("reg to master: FINISH")

            while self.is_running:
                ev = self.next_event(None)
                if isinstance(ev, ioman_base.event_accept):
                    self.handle_accept(ev)
                if isinstance(ev, ioman_base.event_read):
                    if ev.data != "":
                        self.handle_read(ev)

            ParaLiteLog.info("--orderby node %s on %s is finished--" % (self.opid, gethostname()))

        except KeyboardInterrupt, e:
            self.report_error("ParaLite receives a interrupt signal and then will close the process\n")
            ParaLiteLog.info("--orderby node %s on %s is finished--" % (self.opid, gethostname()))
            sys.exit(1)
Example #5
0
 def get_data_by_part_id(self, result, part_id):
     ParaLiteLog.debug("partition number : %s" % len(self.p_node))
     if self.dest != conf.DATA_TO_ANO_OP or self.dest == conf.DATA_TO_ANO_OP and len(self.p_node) == 1:
         # part id is the job id
         rs = ""
         for dataid in self.result:
             for data in self.result[dataid]:
                 if isinstance(data, str):
                     # data is stored in file
                     f = open(data, "rb")
                     rs += f.read()
                     f.close()
                 else:
                     # data is stored in buffer
                     rs += data.getvalue()
         return rs
     
     rs = ""
     for part in result[part_id]:
         if isinstance(part, str):
             # data is stored in file
             f = open(part, "rb")
             rs += f.read()
             f.close()
         else:
             # data is stored in buffer
             rs += part.getvalue()
     return rs
Example #6
0
 def recv_bytes(self, so, n):
     A = []
     while n > 0:
         x = so.recv(n)
         if x == "": break
         A.append(x)
         ParaLiteLog.debug(len(x))
         n = n - len(x)
     return string.join(A, "")
Example #7
0
 def register_to_master(self, cqid, opid, node, port):
     sep = conf.SEP_IN_MSG
     msg = sep.join([conf.REG, conf.DATA_NODE, cqid, opid, gethostname(), str(self.my_port), self.local_addr])
     ParaLiteLog.debug("MASTER_NODE: %s  MASTER_PORT: %s" % (self.master_name, self.master_port))
     addr = (self.master_name, self.master_port)
     sock = socket(AF_INET, SOCK_STREAM)
     try:
         sock.connect(addr)
     except Exception, e:
         ParaLiteLog.error("Error in register_to_master: %s" % traceback.format_exc())
         if e.errno == 4:
             sock.connect(addr)
Example #8
0
 def distinct_data(self, data_list):
     try:
         csio = cStringIO.StringIO()
         whole_data = ""
         for data in data_list:
             if data.strip() == "":
                 continue
             whole_data += data.strip() + self.db_row_sep
             del(data)
         if whole_data == "":
             return None, None, None
         whole_data = set(whole_data.strip().split(self.db_row_sep))
         csio.write(self.db_row_sep.join(whole_data))    
         return conf.SINGLE_BUFFER, [csio], len(csio.getvalue())
     except Exception, e:
         ParaLiteLog.debug(traceback.format_exc())
         self.report_error("ERROR in order_by.py : %s" % traceback.format_exc())
         return None, None, None
Example #9
0
    def sql_proc(self):
        try:
            ParaLiteLog.debug("sql proc : START")
            # start local socket server to listen all connections
            ch = self.iom.create_server_socket(AF_INET,
                                               SOCK_STREAM, 100, ("", self.my_port)) 
            n, self.my_port = ch.ss.getsockname()
            ParaLiteLog.debug("listen on port : %s ..." % str(self.my_port))
            
            # register local port to the master
            self.register_to_master(self.cqid, self.opid, gethostname(), self.my_port)
            ParaLiteLog.debug("reg to master: FINISH")
            
            while self.is_running:
                s_time = time.time()
                ev = self.next_event(None)
                if isinstance(ev, ioman_base.event_accept):
                    self.handle_accept(ev)
                if isinstance(ev, ioman_base.event_read):
                    if ev.data != "":
                        e_time = time.time()
                        self.handle_read(ev)

            for thd in self.threads:
                thd.join()
            for proc in self.processes:
                proc.join()
            ParaLiteLog.info("--sql node %s on %s is finished--" % (self.opid,
                                                                    gethostname()))
            #self.notifier.join()
        except KeyboardInterrupt, e:
            self.report_error("ParaLite receives a interrupt signal and then will close the process\n")
            ParaLiteLog.info("--sql node %s on %s is finished--" % (self.opid,
                                                                    gethostname()))
            sys.exit(1)
Example #10
0
    def proc_select(self, jobid, exp, target_db):
        assert len(target_db) == 1
        cur_db = target_db[0]
        try:
            conn = sqlite3.connect(cur_db)
            conn.text_factory = str
            
            # register the user-defined aggregate
            conn.create_aggregate("mul", 1, mul)

            c = conn.cursor()
            """
            if self.temp_store != 0:
                c.execute('pragma temp_store=%s' % (self.temp_store))
            if self.cache_size != -1:
                c.execute('pragma cache_size=%s' % (self.cache_size))
            """

            # for test
            c.execute('pragma temp_store=memory')
            c.execute('pragma cache_size=2073741824')

            ParaLiteLog.info("start to execute sql: %s" % exp)
            
            col_sep = self.db_col_sep
            row_sep = self.db_row_sep
            num_of_dest = self.partition_num

            if self.dest == conf.DATA_TO_ANO_OP and num_of_dest > 1:
                columns = self.output
                split_key = self.split_key
                assert split_key is not None
                
                # partition data in hash fashion
                pos = []
                for key in split_key:
                    pos.append(columns.index(key))
                data_part_list = []
                for i in range(self.partition_num):
                    data_part_list.append(cStringIO.StringIO())
                size = 0
                t_size = 0
                for row in c.execute(exp):
                    part_id = abs(hash(self.db_col_sep.join(str(row[p]) for p in pos))) % num_of_dest
                    #part_id = abs(hash(row[pos[0]])) % num_of_dest
                    data = col_sep.join(str(s) for s in row)
                    """
                    size += len(data)
                    if size > self.MAX_SIZE:
                        for partid in data_part_list:
                            fs = self.write_data_to_disk(
                                partid, data_part_list[partid])
                            # delete all data in csio
                            data_part_list[partid].truncate(0)
                        t_size += size
                        size = 0
                        self.result_type = self.MULTI_FILE
                    """
                    data_part_list[part_id].write(data)
                    data_part_list[part_id].write(row_sep)

                for i in range(len(data_part_list)):
                    t_size += len(data_part_list[i].getvalue())
                    
                ParaLiteLog.debug("finish to retrieve the result: %s" % t_size)
                
                if self.result_type == self.MULTI_FILE:
                    for partid in data_part_list:
                        self.write_data_to_disk(
                            partid, data_part_list[partid].getvalue())
                        del data_part_list
                    return self.MULTI_FILE, None, t_size
                else:
                    ########################
                    # new_list = []
                    # for d in data_part_list:
                    #     new_list.append(d.getvalue())
                    # return self.MULTI_BUFFER, new_list, t_size
                    ###################
                    return self.MULTI_BUFFER, data_part_list, t_size
                
            else:
                csio = cStringIO.StringIO()
                t_size = 0
                size = 0 # record the size of current data
                data_pos = [] # the file name of data if persisted
                for row in c.execute(exp):
                    # NOTE:  For aggregation SQL, e.g. "select max(col) from T ..."
                    # if there is no record in T, (None,) will be returned
                    if row[0] is None:
                        continue
                    data = col_sep.join(str(s) for s in row)
                    size += len(data)
                    if size >= self.MAX_SIZE:
                        result_type = self.MULTI_FILE
                        self.write_data_to_disk(jobid, csio.getvalue())
                        # delete all data in csio
                        csio.truncate(0)
                        t_size += size
                        size = 0
                    csio.write(data)
                    csio.write(row_sep)

                t_size += len(csio.getvalue())
                ParaLiteLog.debug("finish to retrieve the result: %s" % t_size)

                if self.result_type == conf.MULTI_FILE:
                    self.write_data_to_disk(jobid, csio.getvalue())
                    del csio
                    return conf.MULTI_FILE, None, t_size
                else:
                    return self.SINGLE_BUFFER, [csio], t_size

        except sqlite3.OperationalError, e:
            ParaLiteLog.info(traceback.format_exc())
            raise(Exception("%s: QueryExecutionError: %s" % (gethostname(),
                                                             traceback.format_exc())))
Example #11
0
    def handle_read(self, event):
        message = event.data[10:]

        m = message.split(conf.SEP_IN_MSG)
        try:
            if m[0] == conf.JOB_ARGUMENT:
                self.parse_args(m[1])
                ParaLiteLog.info("parse arguments: FINISH")

            elif m[0] == conf.JOB:
                ParaLiteLog.debug("MESSAGE: %s" % message)
                self.cur_jobid = m[1]

            elif m[0] == conf.DATA:
                data_id = string.strip(m[1][0:2])
                data = m[1][2:]
                self.source_data.append(data)

                # sort data
                if not self.is_data_ready(self.source_data, self.num_of_children):
                    return

                ParaLiteLog.debug("****SORT DATA****: start")
                s = 0
                for data in self.source_data:
                    s += len(data)
                ParaLiteLog.debug("source data size: %s" % s)
                s_time = time.time()
                rs_type, rs, t_size = self.sort(self.source_data)
                del self.source_data
                ParaLiteLog.debug("****SORT DATA****: finish")

                if rs_type is None:
                    self.send_status_to_master(self.cur_jobid, conf.PENDING)
                    return

                self.total_size += t_size
                self.source_data = {}

                # store the result of one job to the final result
                for i in range(len(rs)):
                    if i not in self.result:
                        self.result[i] = [rs[i]]
                    else:
                        self.result[i].append(rs[i])

                if rs_type != conf.MULTI_FILE:
                    # check if the whole data exceeds the LIMITATION
                    if self.total_size > self.MAX_SIZE:
                        self.write_data_to_disk()
                        self.result_type = conf.MULTI_FILE

                e_time = time.time()
                self.total_time += e_time - s_time

                self.send_status_to_master(self.cur_jobid, conf.PENDING)

            elif m[0] == conf.JOB_END:
                ParaLiteLog.debug("MESSAGE: %s" % message)
                # all jobs are finished
                self.send_rs_info_to_master(self.total_size, self.total_time)

                # distribute data
                if self.dest == conf.DATA_TO_ONE_CLIENT:
                    ParaLiteLog.debug("dest = %s" % self.dest)
                    self.distribute_data()
                    self.send_status_to_master(self.cur_jobid, conf.ACK)
                    self.is_running = False
                elif self.dest == conf.DATA_TO_DB:
                    self.distribute_data()

            elif m[0] == conf.DATA_PERSIST:
                # if the data is requried to be persisted or not
                if m[1] == conf.CHECKPOINT:
                    self.write_data_to_disk()

            elif m[0] == conf.DLOAD_REPLY:
                sep = conf.SEP_IN_MSG
                reply = sep.join(m[1:])
                ParaLiteLog.info("receive the information from the master")
                ParaLiteLog.debug(reply)

                if len(self.data.getvalue()) != 0:
                    dload_client.dload_client().load_internal_buffer(
                        reply,
                        self.dest_table,
                        self.data,
                        self.fashion,
                        self.hash_key,
                        self.hash_key_pos,
                        self.db_col_sep,
                        self.db_row_sep,
                        self.db_col_sep,
                        False,
                        "0",
                        self.log_dir,
                    )

                # send END_TAG to the master
                client_id = "0"
                msg = sep.join([conf.REQ, conf.END_TAG, gethostname(), client_id])
                so_master = socket(AF_INET, SOCK_STREAM)
                so_master.connect((self.master_name, self.master_port))
                so_master.send("%10s%s" % (len(msg), msg))
                so_master.close()
                ParaLiteLog.debug("sending to master: %s" % (conf.END_TAG))
                ParaLiteLog.debug("----- dload client finish -------")

            elif message == conf.DLOAD_END_TAG:
                ParaLiteLog.debug("---------import finish---------")
                self.send_status_to_master(" ".join(self.cur_jobid), conf.ACK)
                self.is_running = False

            elif m[0] == conf.EXIT:
                self.is_running = False

            elif m[0] == conf.NODE_FAIL:
                ParaLiteLog.debug("MESSAGE: %s" % message)
                # message --> NODE_FAIL:FAILED_NODE:REPLICA_NODE
                failed_node, replica_node = m[1:3]
                self.failed_node.append(failed_node)
                if replica_node != "" and replica_node == gethostname():
                    # load replica data for the failed node
                    self.recovery_data(self.replica_result, replica_node)
                ParaLiteLog.debug("Finish to handle node failure message")

        except Exception, e:
            es(traceback.format_exc())
            ParaLiteLog.info(traceback.format_exc())
            self.is_running = False
            self.no_error = False
Example #12
0
    def sort(self, data_list):
        try:
            ParaLiteLog.debug("sort data START")
            csio = cStringIO.StringIO()
            whole_data = ""
            for data in data_list:
                if data.strip() == "":
                    continue
                whole_data += data.strip() + self.db_row_sep
                del (data)
            if whole_data == "":
                return None, None, None

            whole_data = whole_data.strip().split(self.db_row_sep)
            ParaLiteLog.debug("order key %s" % (str(self.order_key)))
            ParaLiteLog.debug("order type %s" % (str(self.order_type)))

            # get the column positions to be sorted in the input
            key_pos = []  # (pos:string_or_int:reverse_or_not)
            for key in self.order_key:
                pos = self.input.index(key)
                if self.attrs.has_key(key) == False:
                    key_type = conf.STRING
                else:
                    key_type = self.attrs[key]
                if self.order_type[self.order_key.index(key)] == conf.DESC:
                    key_pos.append((pos, key_type, True))
                else:
                    key_pos.append((pos, key_type, False))

            # check if the ordering type is the same or not
            flag = 0
            t0 = self.order_type[0]
            for t in self.order_type:
                if t != t0:
                    flag = 1
                    break

            if flag == 1:
                # for different ordering type, we have to perform sort several times
                i = len(key_pos) - 1
                while i >= 0:
                    pos = key_pos[i]
                    if pos[1] == conf.INT:
                        whole_data.sort(key=lambda l: string.atoi(l.split(self.db_col_sep)[pos[0]]), reverse=pos[2])
                    elif pos[1] == conf.FLOAT or pos[1] == conf.REAL:
                        whole_data.sort(key=lambda l: float(l.split(self.db_col_sep)[pos[0]]), reverse=pos[2])
                    else:
                        whole_data.sort(key=lambda l: l.split(self.db_col_sep)[pos[0]], reverse=pos[2])
                    i -= 1
            else:
                sort_key = []
                col_sep = self.db_col_sep
                for pos in key_pos:

                    if pos[1] == conf.INT:
                        sort_key.append("string.atoi(l.split('%s')[%s])" % (self.db_col_sep, pos[0]))
                    elif pos[1] == conf.FLOAT or pos[1] == conf.REAL:
                        sort_key.append("float(l.split('%s')[%s])" % (self.db_col_sep, pos[0]))
                    else:
                        sort_key.append("l.split('%s')[%s]" % (self.db_col_sep, pos[0]))

                whole_data.sort(key=lambda l: eval(",".join(sort_key)), reverse=key_pos[0][2])

            csio.write(self.db_row_sep.join(whole_data))
            # if len(self.db_col_sep) == 1 and self.db_row_sep == "\n":
            #     ParaLiteLog.debug("sort data: shell")
            #     self.shell_sort(data_list, csio)
            # else:
            #     ParaLiteLog.debug("sort data: quick sort")
            #     self.quick_sort(data_list, csio)
            return conf.SINGLE_BUFFER, [csio], len(csio.getvalue())
        except Exception, e:
            ParaLiteLog.debug(traceback.format_exc())
            self.report_error("ERROR in order_by.py : %s" % traceback.format_exc())
            return None, None, None
Example #13
0
            while self.is_running:
                ev = self.next_event(None)
                if isinstance(ev, ioman_base.event_accept):
                    self.handle_accept(ev)
                if isinstance(ev, ioman_base.event_read):
                    if ev.data != "":
                        self.handle_read(ev)

            ParaLiteLog.info("--orderby node %s on %s is finished--" % (self.opid, gethostname()))

        except KeyboardInterrupt, e:
            self.report_error("ParaLite receives a interrupt signal and then will close the process\n")
            ParaLiteLog.info("--orderby node %s on %s is finished--" % (self.opid, gethostname()))
            sys.exit(1)
        except Exception, e1:
            ParaLiteLog.debug(traceback.format_exc())
            self.report_error(traceback.format_exc())
            sys.exit(1)

    def handle_read(self, event):
        message = event.data[10:]

        m = message.split(conf.SEP_IN_MSG)
        try:
            if m[0] == conf.JOB_ARGUMENT:
                self.parse_args(m[1])
                ParaLiteLog.info("parse arguments: FINISH")

            elif m[0] == conf.JOB:
                ParaLiteLog.debug("MESSAGE: %s" % message)
                self.cur_jobid = m[1]
Example #14
0
    def parse_func_2(self):
        """
        parse each expression to get useful information: input is different
        input -->      ***[count(*), sum(a), sum(a+b*c)]***
        output -->     ***[count(*), sum(a) + 1, avg(a+b*c)]***
        expression --> ['count(*)', 'sum(a)', 'avg(a+b*c)']
        ==> [ 
             ['count', 1,       0, '*',       1,  0],
             ['sum',   2,       0, 'a',       2,  1, [1], lambda _a:_a+1 ],
             ['sum',   3,       0, 'a+b*c',   3,  0]
            ]
        """
        # if there is avg function, convert it to sum and count
        avg_pos = []
        for fun in self.function:
            if fun.find("avg(") != -1:
                avg_pos.append(self.function.index(fun))
        if avg_pos != []:
            if "count(*)" not in self.function:
                self.function.append("count(*)")
        # sometimes, a result column has more than one function, then the pos in
        # the self.expr and self.expression is not the same, this counter is to 
        # count the pos of a function in self.expr and pos_map shows the mapping
        # of these two kinds of postions.
        fun_counter = 0 
        pos_map = {}
        for expr_num in range(len(self.function)):
            expr = self.function[expr_num]
            pos_map[expr_num] = fun_counter

            # parse "sum(a) + 1" --> ['sum(a)', '+', '1']
            if expr.find("count(*)") != -1:
                expr = expr.replace("count(*)", "count(a_a)")
            _expr = newparser.parse_column_expr(expr)
            
            #  to describe the pos of elements in the outer arithmetic operation
            outer_ao_pos = [] 
            new_expr = expr
            new_args = []
            ParaLiteLog.debug(self.input)
            for ele in _expr:
                if re.match("(.*)\((.*)\)", ele):
                    parsed_expr = []                                
                    new_expr = new_expr.replace(ele, "_col%s" % str(fun_counter))
                    new_args.append("_col%s" % str(fun_counter))
                    func_name = ele[0:ele.find("(")]
                    if func_name not in conf.GENERAL_FUNC:
                        return False, "ParaLite cannot support aggregation function %s" % func_name
                    if func_name == "avg":
                        ele = ele.replace("avg", "sum")
                    fun_counter += 1
                    func_attr = ele[ele.find("(") + 1 : ele.rfind(")")]
                    parsed_expr.append(func_name)
                    if func_attr == "a_a": 
                        func_attr = "*"
                        ele = ele.replace("a_a", "*")
                        expr = expr.replace("count(a_a)", "count(*)")
                        opexpr = [func_attr]
                    pos_in_input = self.input.index(ele)
                    parsed_expr.append(pos_in_input)
                    parsed_expr.append(0)
                    parsed_expr.append(ele)
                    if expr in self.output: parsed_expr.append(self.output.index(expr))
                    else: parsed_expr.append(-1)
                    self.expr.append(parsed_expr)
                    outer_ao_pos.append(fun_counter - 1)
                else:
                    # other operator element: + - * / ^
                    continue
            cur_pos = pos_map[expr_num]
            if cur_pos >= len(self.expr):
                # the exception that select sum(a), count(*), avg(a) ... does not need
                # to do anything for avg(a)
                continue
            if len(_expr) == 1:
                self.expr[cur_pos].append(0)
            else:
                self.expr[cur_pos].append(1)
                self.expr[cur_pos].append(outer_ao_pos)

                tempexpr = new_expr
                tempargs = ",".join(new_args)
                for eacharg in new_args:
                    newarg = eacharg.replace(".", "_")
                    tempexpr = tempexpr.replace(eacharg, newarg)
                    tempargs = tempargs.replace(eacharg, newarg)
                    
                self.expr[cur_pos].append(
                    eval("lambda %s:%s" % (tempargs, tempexpr)))
        self.pos_map = pos_map
        return True, None
Example #15
0
            elif m[0] == conf.DATA_PERSIST:
                # if the data is requried to be persisted or not
                ParaLiteLog.debug("MESSAGE: %s" % message)
                self.process_ck_info(m)

            elif m[0] == conf.DATA_REPLICA:
                # message --> DATA_REPLICA:DATANODE:DATAID:DATA
                datanode, dataid = m[1:3]
                f_name = self.get_file_name_by_data_id(gethostname(), dataid)
                fr = open(f_name, "wa")
                fr.write(m[3])
                fr.close()

            elif m[0] == conf.DATA_DISTRIBUTE:
                ParaLiteLog.debug("MESSAGE: %s" % message)                
                # send a part of data to the next operator
                # DATA_DISTRIBUTE:partition_num:destnode
                part_id, destnode = m[1:]
                data = self.get_data_by_part_id(self.result, string.atoi(part_id))
                
                # DATA message includes: type:id+data
                # the first 2 chars represents the opid
                msg = sep.join([conf.DATA, "%2s%s" % (self.opid, data)])
                if destnode == gethostname():
                    # use local socket
                    addr = self.p_node[destnode][1]
                    t = AF_UNIX
                else:
                    addr = (destnode, self.p_node[destnode][0])
                    t = AF_INET
Example #16
0
    def handle_read(self, event):
        message = event.data[10:]

        sep = conf.SEP_IN_MSG
        m = message.split(sep)
        try:
            if m[0] == conf.DATA_END:
                ParaLiteLog.debug("MESSAGE: %s" % message)
                # all data is dipatched to the parent nodes
                self.send_status_to_master(" ".join(self.job_data), conf.ACK)
                ParaLiteLog.debug("notify ACK to master")                    
                self.is_running = False
                
            elif message == conf.END_TAG:
                ParaLiteLog.debug("MESSAGE: %s" % message)
                self.send_status_to_master(" ".join(self.job_data), conf.ACK)
                self.is_running = False

            elif message == conf.DLOAD_END_TAG:
                ParaLiteLog.debug("---------import finish---------")
                self.send_status_to_master(" ".join(self.job_data), conf.ACK)
                self.is_running = False

            elif message == conf.EXIT:
                ParaLiteLog.debug("MESSAGE: %s" % message)
                self.is_running = False
            
            elif m[0] == conf.JOB_ARGUMENT:
                self.parse_args(m[1])
                ParaLiteLog.info("parse arguments: FINISH")
                # init the persisted result data
                if self.is_checkpoint is not None and self.is_checkpoint == conf.CHECKPOINT:
                    ParaLiteLog.debug("recovery data: START")
                    # this is a recovery operator
                    self.recovery_data(self.result, gethostname())
                    ParaLiteLog.debug("recovery data: FINISH")
                    self.send_rs_info_to_master(0, 0)
                else:
                    # delete all temporary files for this operator
                    os.system("rm -f %s/%s_%s" % (self.temp_dir, "sql", self.opid))

                ###############################
                # scanner = threading.Thread(target=self.scan_process_queue, args=(self.process_queue, ))
                # scanner.setDaemon(True)
                # scanner.start()
                # self.threads.append(scanner)
                ##########################
            elif m[0] == conf.JOB:
                self.ex_s_time = time.time()
                self.ex_w_time = 0
                ParaLiteLog.debug("MESSAGE: %s" % message)
                s_time = time.time()
                jobid = m[1]
                target_db = m[2].split()
                exp = self.expression
                ParaLiteLog.debug("*****JOB %s******:start" % jobid)
                
                # FAULT TOLERANCE:
                if jobid in self.job_data:
                    # this is a failed job, we should first delete the old result value
                    if self.dest == conf.DATA_TO_ANO_OP and self.partition_num > 1:
                        for partid in self.result:
                            pos = self.job_list.index(jobid)
                            self.result[partid][pos] = ""
                    else:
                        self.result[jobid] = ""
                
                if exp.lower().startswith("select"):
                    """
                    selection task: (1), execute sql (2), notify the result to
                    the master (3), wait for the DATA_PERSIST message from the
                    master (4), persist data if so (5), notify ACK to the master
                    """
                    ParaLiteLog.info("proc_select: START")
                    st_time = time.time()
                    
                    rs_type, rs, t_size = self.proc_select(jobid, exp, target_db)

                    et_time = time.time()
                    ParaLiteLog.debug("Job %s cost time %s second" % (jobid, (et_time - st_time)))
                    # FAULT TOLERANCE:
                    if jobid in self.job_data:
                        # this is a failed job
                        if self.dest == conf.DATA_TO_ANO_OP and self.partition_num > 1:
                            for partid in self.result:
                                pos = self.job_list.index(jobid)
                                self.result[partid][pos] = rs[partid]
                        else:
                            self.result[jobid] = rs
                        self.send_status_to_master(jobid, conf.PENDING)
                        return
                        
                    self.job_data[jobid] = t_size
                    self.job_list.append(jobid)
                    self.total_size += t_size
                    
                    # store the result of one job to the final result
                    if len(rs) == 1:
                        if self.dest == conf.DATA_TO_ANO_OP:
                            # dest is AGGR op or ORDER op, use 0 as the key
                             if 0 not in self.result:
                                 self.result[0] = rs
                             else:
                                 self.result[0].append(rs[0])

                             if self.is_checkpoint == 1:
                                 self.write_data_to_disk(0, rs[0].getvalue())
                        else:
                            # dest is UDX op, use jobid as the key
                            self.result[string.atoi(jobid)] = rs
                            if self.is_checkpoint == 1:
                                self.write_data_to_disk(0, rs[0].getvalue())
                        
                    else:
                        # use partid as the key
                        for i in range(len(rs)):
                            if i not in self.result:
                                self.result[i] = [rs[i]]
                            else:
                                self.result[i].append(rs[i])
                        if self.is_checkpoint == 1:
                            for i in range(len(rs)):
                                self.write_data_to_disk(i, rs[i].getvalue())
                        
                    # check if the whole data exceeds the LIMITATION
                    if rs_type != self.MULTI_FILE:
                        if self.is_checkpoint is not None and self.is_checkpoint == conf.CHECKPOINT or self.total_size > self.MAX_SIZE:
                            for dataid in self.result:
                                data = ""
                                for d in self.result[dataid]:
                                    data += d.getvalue()
                                self.write_data_to_disk(dataid, data)
                            self.result_type = self.MULTI_FILE
                            
                    e_time = time.time()
                    if self.total_time == 0:
                        self.total_time = (e_time - s_time)
                    self.send_status_to_master(jobid, conf.PENDING)

                elif exp.lower().startswith("create"):
                    ParaLiteLog.info("proc_create: START")
                    ParaLiteLog.info("SQL: %s" % exp)                    
                    self.proc_create(exp, target_db)
                    ParaLiteLog.info("proc_create: START")
                    self.send_status_to_master(jobid, conf.ACK)
                    self.is_running = False
                elif exp.lower().startswith("drop"):
                    ParaLiteLog.info("proc_drop: START")            
                    self.proc_drop(exp, target_db)
                    self.send_status_to_master(jobid, conf.ACK)
                    self.is_running = False
                ParaLiteLog.debug("*****JOB %s******:finish" % jobid)
                self.ex_w_time += (time.time() - self.ex_s_time)
                self.ex_s_time = 0

            elif m[0] == conf.JOB_END:
                ParaLiteLog.debug("MESSAGE: %s" % message)
                # all jobs are finished
                # create a dictionary to store the status of each part of data
                data_status = {}  # {data_id:[(pos_in_result, status)]}
                for dataid in self.result:
                    if dataid not in data_status:
                        data_status[dataid] = []
                    for i in range(len(self.result[dataid])):
                        data_status[dataid].append((i, 1))
                self.data_status = data_status
                self.reader = self.get_next_reader()
                    
                self.send_rs_info_to_master(self.total_size, self.total_time)

                # distribute data
                if self.dest == conf.DATA_TO_ONE_CLIENT:
                    self.distribute_data()
                    self.send_status_to_master(" ".join(self.job_data), conf.ACK)
                    self.is_running = False
                elif self.dest == conf.DATA_TO_DB:
                    self.distribute_data()
 
            elif m[0] == conf.DLOAD_REPLY:
                reply = sep.join(m[1:])
                ParaLiteLog.info("receive the information from the master")
                ParaLiteLog.debug(reply)
                
                if len(self.data.getvalue()) != 0:
                    dload_client.dload_client().load_internal_buffer(
                        reply, self.dest_table, self.data, self.fashion, 
                        self.hash_key, self.hash_key_pos, self.db_col_sep, 
                        self.db_row_sep, self.db_col_sep, False, "0", self.log_dir)

                # send END_TAG to the master
                client_id = "0"
                msg = sep.join([conf.REQ, conf.END_TAG, gethostname(), client_id])
                so_master = socket(AF_INET, SOCK_STREAM)
                so_master.connect((self.master_name, self.master_port))
                so_master.send("%10s%s" % (len(msg), msg))
                so_master.close()
                ParaLiteLog.debug("sending to master: %s" % (conf.END_TAG))
                ParaLiteLog.debug("----- dload client finish -------")

            elif m[0] == conf.DATA_PERSIST:
                ParaLiteLog.debug("MESSAGE: %s" % message)
                # if the data is requried to be persisted or not
                self.process_ck_info(m)
                
            elif m[0] == conf.DATA_DISTRIBUTE:
                ParaLiteLog.debug("MESSAGE: %s" % message)
                # send a part of data to the next operator
                # DATA_DISTRIBUTE:partition_num:destnode
                part_id, destnode = m[1:]
                data = self.get_data_by_part_id(self.result, string.atoi(part_id))
                
                # DATA message includes: type:id+data
                # the first 2 chars represents the opid
                msg = sep.join([conf.DATA, "%2s%s" % (self.opid, data)])
                if destnode == gethostname():
                    # use local socket
                    addr = self.p_node[destnode][1]
                    t = AF_UNIX
                else:
                    addr = (destnode, self.p_node[destnode][0])
                    t = AF_INET
                self.send_data_to_node(msg, t, addr)
                ParaLiteLog.debug("send data susscufully   %s %s --> %s" % (self.opid, gethostname(), destnode))

            elif m[0] == conf.DATA_DISTRIBUTE_UDX:
                ParaLiteLog.debug("MESSAGE: %s" % message)
                # send data to udx client
                # m[1:] = worker.id:jobid:(node:port | addr):size
                
                if len(m) == 6:
                    w_id, jobid = m[1:3]
                    addr = (m[3], string.atoi(m[4]))
                    t = AF_INET
                    bk = string.atoi(m[5])
                elif len(m) == 5:
                    w_id, jobid = m[1:3]
                    addr = m[3]
                    t = AF_UNIX
                    bk = string.atoi(m[4])
                data = self.get_data_by_blocksize(jobid, bk)
                if not data:
                    # if we don't send something here, udx will not send KAL
                    # again, and then they will not receive data again, the whole
                    # process will be blocked for ever
                    msg = sep.join([conf.DATA, "EMPTY"])
                else:
                    msg = sep.join([conf.DATA, data])
                self.send_data_to_node(msg, t, addr)
                
            elif m[0] == conf.DATA_REPLICA:
                ParaLiteLog.debug("MESSAGE: %s" % message)
                # message --> DATA_REPLICA:DATANODE:DATAID:DATA
                datanode, dataid = m[1:3]
                f_name = self.get_file_name_by_data_id(gethostname(), dataid)
                fr = open(f_name, "wa")
                fr.write(m[4])
                fr.close()

            elif m[0] == conf.NODE_FAIL:
                ParaLiteLog.debug("MESSAGE: %s" % message)
                # message --> NODE_FAIL:FAILED_NODE:REPLICA_NODE
                failed_node, replica_node = m[1:3]
                self.failed_node.append(failed_node)
                if replica_node == gethostname():
                    # load replica data for the failed node
                    self.recovery_data(self.replica_result, replica_node)
        except Exception, e:
            es("in sql_proc : %s" % traceback.format_exc())
            ParaLiteLog.info(traceback.format_exc())
            self.is_running = False
            self.no_error = False
Example #17
0
 def process_job(self, jobid, exp, target_db, process_queue):
     s_time = time.time()
     rs_type, rs, t_size = self.proc_select(jobid, exp, target_db)
     e_time = time.time()
     process_queue.put((jobid, rs_type, rs, t_size, e_time-s_time))
     ParaLiteLog.debug("Job %s cost %s " % (jobid, e_time-s_time))
Example #18
0
    def handle_read(self, event):
        message = event.data[10:]

        sep = conf.SEP_IN_MSG
        m = message.split(sep)
        try:        
            if m[0] == conf.JOB_ARGUMENT:
                self.parse_args(m[1])

                ParaLiteLog.info("parsed structure : \n%s" % str(self.expr))
                ParaLiteLog.info("parse arguments: FINISH")

                if self.is_checkpoint is not None and self.is_checkpoint == conf.CHECKPOINT:
                    # this is a recovery operator
                    # init the persisted result data
                    ParaLiteLog.debug("recovery data: START")
                    self.recovery_data(self.result, gethostname())
                    ParaLiteLog.debug("recovery data: FINISH")
                    self.send_rs_info_to_master(0, 0)

                else:
                    self.parse_func()
                    # delete all temporary files for this operator
                    os.system("rm -f %s/%s_%s" % (self.temp_dir, "groupby", self.opid))


            elif m[0] == conf.JOB:
                ParaLiteLog.debug("MESSAGE: %s" % message)                
                self.cur_jobid = m[1]
                self.job_list.append(m[1])
                
            elif m[0] == conf.DATA:
                data_id = string.strip(m[1][0:2])
                data = m[1][2:]
                self.source_data.append(data)

                # aggregate data
                if not self.is_data_ready(self.source_data, self.num_of_children):
                    return

                ParaLiteLog.debug("****GROUP DATA****: start")
                s = 0
                for data in self.source_data:
                    s += len(data)
                ParaLiteLog.debug("source data size : %s" % s)
                s_time = time.time()
                rs_type, rs, t_size = self.hash_based_aggregate(self.source_data)
                ParaLiteLog.debug("****GROUP DATA****: finish")
                
                del self.source_data
                self.total_size += t_size
                self.source_data = []
                
                # store the result of one job to the final result
                if len(rs) == 1:
                    if self.dest == conf.DATA_TO_ANO_OP or self.dest == conf.DATA_TO_DB:
                        # dest is AGGR op or ORDER op, use 0 as the key
                        if 0 not in self.result:
                            self.result[0] = rs
                        else:
                            self.result[0].append(rs[0])
                    else:
                        # dest is UDX op, use jobid as the key
                        self.result[string.atoi(self.cur_jobid)] = rs
                        if self.is_checkpoint == 1:
                            self.write_data_to_disk(self.cur_jobid, rs[0].getvalue())

                else:
                    # use partid as the key
                    for i in range(len(rs)):
                        if i not in self.result:
                            self.result[i] = [rs[i]]
                        else:
                            self.result[i].append(rs[i])

                # check if the whole data exceeds the LIMITATION
                if rs_type != conf.MULTI_FILE:
                    if self.total_size > self.MAX_SIZE:
                        for dataid in self.result:
                            data = ""
                            for d in self.result[dataid]:
                                data += d
                            self.write_data_to_disk(dataid, data)
                        self.result_type = conf.MULTI_FILE
                
                e_time = time.time()
                self.total_time += (e_time - s_time)
                self.send_status_to_master(self.cur_jobid, conf.PENDING)
                    
            elif m[0] == conf.JOB_END:
                ParaLiteLog.debug("MESSAGE: %s" % message)                
                # all jobs are finished
                self.send_rs_info_to_master(self.total_size, self.total_time)
                
                # distribute data
                if self.dest == conf.DATA_TO_ONE_CLIENT:
                    self.distribute_data()
                    self.send_status_to_master(" ".join(self.job_list), conf.ACK)
                    self.is_running = False
                else self.dest == conf.DATA_TO_DB:
                    self.distribute_data()

            elif m[0] == conf.DATA_PERSIST:
                # if the data is requried to be persisted or not
                ParaLiteLog.debug("MESSAGE: %s" % message)
                self.process_ck_info(m)
Example #19
0
    def load_internal_file(self, reply, opt, db_col_sep, LOG_DIR):
        ParaLiteLog.info("load_internal_file: START")
        table = opt.table
        files = opt.files
        col_sep = opt.col_sep
        row_sep = opt.row_sep
        fashion = opt.fashion
        key = opt.key
        key_pos = opt.key_pos
        is_replace = opt.replace

        self.db_col_sep = db_col_sep        
        for f in files:
            self.files[f] = 1
        self.file_reader = open(self.get_next_file(), "rb")

        try:
            """
            received message = nodes # sub_dbs # chunk_num # replica_info 
            
            nodes should be (| is SEP_IN_MSG):
            n1 : p1|l1 , n2 : p2|l2 , ...               IF fashion = HASH_FASHION 
            n1 : p1|l1|s1|num , n2 : p2|l2|s2|num , ... IF fashion = ROUND_ROBIN
            TBD                                     IF fashion = RANGE_FASHION

            node_db_info: node1:[db_1_1] , node2:[db_1_2] , node3:[db_2_1], ...
            replica_info: db_1_1 db_1_1_r_1 node1 , db_1_2 db_1_2_r_1 node2 , ...
            """
            mm = reply.split("#")

            nodes = mm[0].split(",")
            sub_dbs = mm[1].split(",")
            chunk_num = string.atoi(mm[2])
            replica = mm[3]
            
            replica_info = {} # {db_name : {replica_db_name:node}}
            if replica != "":
                for whole_re in replica.split(","):
                    lll = whole_re.split(" ")
                    if lll[0] not in replica_info:
                        replica_info[lll[0]] = {}
                    replica_info[lll[0]][lll[1]] = lll[2]

            node_addr = {} # {node:addr}
            for node in nodes:
                m = node.split(conf.SEP_IN_MSG)
                if m[0] == gethostname(): addr = m[2]
                else: addr = (m[0], string.atoi(m[1]))
                node_addr[m[0]] = addr
            
            thds = []
            if nodes == []:
                ParaLiteLog.info("there is no data to load")
            elif fashion == conf.HASH_FASHION:
                ParaLiteLog.info(fashion)
                if row_sep is not None and row_sep != "\n":
                    while True:
                        dst = self.get_data_as_bk(DATA_MAX_SIZE)
                        if dst is None:
                            ParaLiteLog.info("really get data as bk: 0")
                            break
                        ParaLiteLog.info("really get data as bk: %s" % (len(dst)))
                        pos = dst.rfind(row_sep)

                        ds =  left_ds + dst[0:pos]
                        left_ds = dst[pos+len(row_sep):]
                        del dst
                        db_buf = self.hash_data_file(ds, key_pos, nodes,
                                                       row_sep, col_sep,
                                                       chunk_num, sub_dbs)
                        ParaLiteLog.debug("hash data finish %s" % len(ds))
                        del ds                        
                        for db in db_buf:
                            data = db_buf[db].getvalue()
                            node = db.split("_")[-3]
                            thd = threading.Thread(target=self.send_to_node,
                                                   args=(db, table, data, node_addr[node],
                                                         row_sep,col_sep,
                                                         is_replace))
                            thd.setDaemon(True)
                            thd.start()
                            thds.append(thd)
                            if db in replica_info:
                                for rdb in replica_info[db]:
                                    node = replica_info[db][rdb]
                                    self.send_to_node(rdb, table, data,
                                                      node_addr[node],
                                                      row_sep, col_sep, is_replace)

                else:
                    while True:
                        ds = self.get_data_as_bk(DATA_MAX_SIZE)
                        if ds is None:
                            ParaLiteLog.info("really get data as bk: 0")
                            break
                        ParaLiteLog.info("really get data as bk: %s" % (len(ds)))
                        db_buf = self.hash_data_file(ds, key_pos, nodes,
                                                       "\n", col_sep, chunk_num, sub_dbs)
                        for db in db_buf:
                            ParaLiteLog.debug(
                                "%s -- > %s" % (db, len(db_buf[db].getvalue())))
                            break
                        for db in db_buf:
                            data = db_buf[db].getvalue()
                            node = db.split("_")[-3]

                            thd = threading.Thread(target=self.send_to_node,
                                                   args=(db, table, data,
                                                         node_addr[node],
                                                         row_sep,col_sep,
                                                         is_replace))
                            thd.setDaemon(True)
                            thd.start()
                            thds.append(thd)
                            if db in replica_info:
                                for rdb in replica_info[db]:
                                    node = replica_info[db][rdb]
                                    self.send_to_node(rdb, table, data,
                                                      node_addr[node],
                                                      row_sep, col_sep, is_replace)
                        del db_buf        
                        del ds
                        
            elif fashion == conf.REPLICATE_FASHION:
                self.replicate_data(table, files, total_size, nodes)
            elif fashion == conf.RANGE_FASHION:
                self.range_data()
            else:
                num_of_db = len(nodes) * chunk_num                
                if row_sep is not None and row_sep != "\n":
                    i = 0
                    left_ds = ""
                    while True:
                        db = sub_dbs[i % num_of_db]
                        #m = nodes[(i % num_of_db) / chunk_num].split(conf.SEP_IN_MSG) 
                        #node = m[0]
                        node = db.split("_")[-3]

                        size = string.atoi(m[3]) / chunk_num + 1
                        if size > DATA_MAX_SIZE:
                            ParaLiteLog.info("start to get data as bk: %s" % (DATA_MAX_SIZE))  
                            ds = self.get_data_as_bk(DATA_MAX_SIZE)
                        else:
                            ParaLiteLog.info("start to get data as bk: %s" % (size))
                            ds = self.get_data_as_bk(size)
                        if ds is None:
                            ParaLiteLog.info("really get data as bk: 0")
                            break
                        ParaLiteLog.info("really get data as bk: %s" % (len(ds)))
                        pos = ds.rfind(row_sep)
                        send_ds =  left_ds + ds[0:pos]
                        left_ds = ds[pos+len(row_sep):]
                        thd = threading.Thread(
                            target=self.send_to_node,
                            args=(db, table, send_ds, node_addr[node],
                                  row_sep, col_sep, is_replace))
                        thd.setDaemon(True)
                        thd.start()
                        thds.append(thd)
                        if db in replica_info:
                            for rdb in replica_info[db]:
                                node = replica_info[db][rdb]
                                thd = threading.Thread(
                                    target=self.send_to_node,
                                    args=(rdb, table, ds, node_addr[node],
                                          row_sep, col_sep, is_replace))
                                thd.setDaemon(True)
                                thd.start()
                                thds.append(thd)
                        i += 1
                else:
                    i = 0
                    while True:
                        db = sub_dbs[i % num_of_db]
                        #m = nodes[(i % num_of_db)/chunk_num].split(conf.SEP_IN_MSG) 
                        #node = m[0]
                        node = db.split("_")[-3]
                        size = string.atoi(m[3]) / chunk_num + 1
                        if size > DATA_MAX_SIZE:
                            ParaLiteLog.info(
                                "start to get data as bk: %s" % (DATA_MAX_SIZE))  
                            ds = self.get_data_as_bk(DATA_MAX_SIZE)
                        else:
                            ParaLiteLog.info("start to get data as bk: %s" % (size))
                            ds = self.get_data_as_bk(size)
                        if ds is None:
                            ParaLiteLog.info("really get data as bk: 0")
                            break
                        ParaLiteLog.info("really get data as bk: %s" % (len(ds)))
                        thd = threading.Thread(
                            target=self.send_to_node,
                            args=(db, table, ds, node_addr[node],
                                  row_sep,col_sep, is_replace))
                        thd.setDaemon(True)
                        thd.start()
                        thds.append(thd)
                        if db in replica_info:
                            for rdb in replica_info[db]:
                                ParaLiteLog.info(rdb)
                                node = replica_info[db][rdb]
                                thd = threading.Thread(
                                    target=self.send_to_node,
                                    args=(rdb, table, ds, node_addr[node],
                                          row_sep, col_sep, is_replace))
                                thd.setDaemon(True)
                                thd.start()
                                thds.append(thd)
                        i += 1
                        del ds
            for thd in thds:
                thd.join()
        except Exception, e:
            ParaLiteLog.debug(traceback.format_exc())
            raise(Exception(traceback.format_exc()))
Example #20
0
         es("in write_to_db: %s" % (traceback.format_exc()))
         ParaLiteLog.info(traceback.format_exc())
         sys.exit(1) 
     ParaLiteLog.info("record_num is %s" % (record_num))
     con.commit()
     cr.close()
     con.close()
 else:
     data = string.strip(data)
     record_num = len(data.split("\n"))
     ParaLiteLog.info("record number is : %s" % (record_num))
     temp_file = "%s%s%s_%s.dat" % (self.temp_dir, os.sep, random.randint(1, 1000), self.port)
     f = open(temp_file, "wb")
     f.write(data)
     f.close()
     ParaLiteLog.debug("DB_COL_SEP: %s" % self.db_col_sep)
     if self.db_col_sep != "|":
         ParaLiteLog.info("LOAD: execute .separator and .import")
         if self.cmd_col_sep is None or self.cmd_col_sep == "None":
             sep_temp = self.db_col_sep
         else:
             sep_temp = self.cmd_col_sep
         sqlf = "%s%s%s-import" % (self.log_dir, os.sep, random.randint(1,1000))
         sqlff = open(sqlf, "wb")
         sqlff.write(".separator %s\n" % (sep_temp))
         sqlff.write(".import %s %s" % (temp_file, self.table))
         sqlff.close()
         cmd = "sqlite3 %s < %s" % (db, sqlf)
         os.system(cmd)
         os.system("rm -f %s" % (sqlf))
     else: