Esempio n. 1
0
 def start(self):
     global LOG_DIR, log
     start = time.time()*1000
     self.parse_args(arguments)
     log = open("%s/groupby-%s-%s.log" % (LOG_DIR, gethostname(), getCurrentTime()), "wb")
     host = self.node[0]
     port = self.node[1]
     queue = cqueue.cqueue(host, port)
     queue.listen()
     """
     data_list = [data1, data2, data3...]
     data1: data1.id, data1.data(string)
     """
     data_list = queue.get(self.num_of_children)
     queue.close()
     logging("GET SOURCE DATA")
     proc_start = time.time()*1000
     result = self.sort_base_proc(data_list)
     logging("GET GROUPED DATA")
     proc_end = time.time()*1000
     self.distribute(result)
     dis_end = time.time()*1000
     logging("--------gourp node %s is finished--------" % (self.id))
Esempio n. 2
0
    def distribute(self, data):
        sep = conf.SEPERATOR
        if self.dest == conf.DATA_TO_CLIENTS:
            # This is final result, store them to a file
            dest_file = conf.OUTPUT_DIR + self.dest
            file_handler = io.FileIO(dest_file, 'wb', closefd=True)
            bw = io.BufferedWriter(file_handler, buffer_size=65536)
            for row in data:
                bw.write(row)
                bw.write('\n')
            bw.close()
            addr = (conf.MASTER_NAME, string.atoi(conf.MASTER_PORT))
            sock = socket(AF_INET, SOCK_STREAM)
            sock.connect(addr)
            msg = '%s:%s:%s:%s' % (conf.ACK, conf.DATA_NODE, self.cqid, gethostname())
            sock.send('%10s%s' % (len(msg), msg))
        elif self.dest == conf.DATA_TO_ANO_OP:
            # This is intermediate data, pipeline it
            num_of_dest = len(self.p_node)
            if num_of_dest == 1:
                port = self.p_node.keys()[0]
                node = self.p_node[port]
                queue = cqueue.cqueue(node, port)
                queue.connect()
                # The first message is the output attrs of this sql
                msg = ''
                for out in self.output:
                    msg += '%s%s' % (out, sep)
                queue.put(msg)
                queue.put(data.getvalue())
                queue.close()
            elif num_of_dest > 1:
                queue_list = []
                """
                for p in self.p_port:
                    queue = cqueue(p.split(':')[0],string.atoi(p.split(':')[1]))
                    queue.connect()
                    # The first message is the output attrs of this sql
                    msg = ''
                    for out in self.output:
                        msg += '%s%s' % (out, sep)
                    queue.put(msg)
                    queue_list.append(queue)
                """
                for port in self.p_node:
                    queue = cqueue(self.p_node[port],port)
                    queue.connect()
                    # The first message is the output attrs of this sql
                    msg = ''
                    for out in self.output:
                        msg += '%s%s' % (out, sep)
                    queue.put(msg)
                    queue_list.append(queue)

                if self.split_key != None:
                    # partition data in hash fashion
                    split_key = self.split_key
                    if columns[0].find('.') == -1 and len(split_key.split('.')) == 2:
                        pos = columns.index(split_key.split('.')[1])
                    else:
                        pos = columns.index(split_key)
                    for row in rs:
                        partition_num = abs(hash(row[pos])) % num_of_dest
                        queue = queue_list[partition_num]
                        msg = ''
                        for r in data:
                            msg += '%s%s' % (str(r), sep)
                        queue.put(msg)
                else:
                    # partitioning data in range fashion
                    for i in range(len(data)):
                        partition_num = i % num_of_dest
                        queue = queue_list[partition_num]
                        msg = ''
                        for r in data:
                            msg += '%s%s' % (str(r), sep)
                        queue.put(msg)
                for queue in queue_list:
                    queue.close()
        elif self.dest == conf.DATA_TO_ONE_CLIENT:
            sock = socket(AF_INET, SOCK_STREAM)
            sock.connect(self.client)
            sock.send('%10sDATA' % (4))
            sock.send("%10s%s" % (len(data.getvalue()), data.getvalue()))
            sock.close()
            self.notify_to_master()
        elif self.dest == conf.DATA_TO_DB:
            """
            out_name = "%s%sgroup_%s" % (LOG_DIR, os.sep, self.cqid)
            f = open(out_name, "wb")
            f.write(data.getvalue())
            f.close()
            """
            col_sep = self.db_col_sep
            row_sep = self.db_row_sep
            master = (self.master_name, self.master_port)
            dload_client.dload_client().load_internal(master, self.cqid, gethostname(),self.dest_db, self.dest_table, data, 1, self.fashion, self.hash_key, self.hash_key_pos, col_sep, row_sep, col_sep, False, "0", LOG_DIR)
            self.notify_to_master()
            #os.remove(out_name)
        else:
            random_num = random.randint(0, len(self.client_sock) - 1)
            addr = self.client_sock[random_num]
            sock = socket(AF_INET, SOCK_STREAM)
            sock.connect(addr)
            d = string.strip(data.getvalue())
            sock.send("%10s%s" % (len(d), d))
            sock.close()
            self.notify_to_master()