Beispiel #1
0
 def _configure(self, input_file, output_file, port, env):
     self._connection = Connection.TwinBufferingTCPMappedFileConnection(input_file, output_file, port)
     self._iterator = Iterator.Iterator(self._connection, env, 0)
     self._iterator2 = Iterator.Iterator(self._connection, env, 1)
     self._cgiter = Iterator.CoGroupIterator(self._iterator, self._iterator2, self._keys1, self._keys2)
     self.context = RuntimeContext.RuntimeContext(self._iterator, self._collector)
     self._configure_chain(Collector.Collector(self._connection, env))
Beispiel #2
0
 def _configure(self, input_file, output_file, port, env, info, subtask_index):
     self._connection = Connection.TwinBufferingTCPMappedFileConnection(input_file, output_file, port)
     self._iterator = Iterator.Iterator(self._connection, env, 0)
     self._iterator2 = Iterator.Iterator(self._connection, env, 1)
     self._cgiter = Iterator.CoGroupIterator(self._iterator, self._iterator2, self._keys1, self._keys2)
     self._collector = Collector.Collector(self._connection, env, info)
     self.context = RuntimeContext.RuntimeContext(self._iterator, self._collector, subtask_index)
     if info.chained_info is not None:
         info.chained_info.operator._configure_chain(self.context, self._collector, info.chained_info)
         self._collector = info.chained_info.operator
Beispiel #3
0
 def _configure(self, input_file, output_file, port):
     self._connection = Connection.BufferingTCPMappedFileConnection(
         input_file, output_file, port)
     self._iterator = Iterator.Iterator(self._connection)
     self.context = RuntimeContext.RuntimeContext(self._iterator,
                                                  self._collector)
     self._configure_chain(Collector.Collector(self._connection))
 def _configure(self, input_file, output_file, port, env, info):
     super(GroupReduceFunction, self)._configure(input_file, output_file,
                                                 port, env, info)
     if info.key1 is None:
         self._run = self._run_all_group_reduce
     else:
         self._run = self._run_grouped_group_reduce
         self._group_iterator = Iterator.GroupIterator(
             self._iterator, info.key1)
Beispiel #5
0
    def execute(self, local=False, debug=False):
        """
        Triggers the program execution.

        The environment will execute all parts of the program that have resulted in a "sink" operation.
        """
        if debug:
            local = True
        self._local_mode = local
        self._debug_mode = debug
        self._optimize_plan()

        plan_mode = sys.stdin.readline().rstrip('\n') == "plan"

        if plan_mode:
            port = int(sys.stdin.readline().rstrip('\n'))
            self._connection = Connection.PureTCPConnection(port)
            self._iterator = Iterator.PlanIterator(self._connection, self)
            self._collector = Collector.PlanCollector(self._connection, self)
            self._send_plan()
            result = self._receive_result()
            self._connection.close()
            return result
        else:
            import struct
            operator = None
            try:
                port = int(sys.stdin.readline().rstrip('\n'))

                id = int(sys.stdin.readline().rstrip('\n'))
                input_path = sys.stdin.readline().rstrip('\n')
                output_path = sys.stdin.readline().rstrip('\n')

                used_set = None
                operator = None
                for set in self._sets:
                    if set.id == id:
                        used_set = set
                        operator = set.operator
                operator._configure(input_path, output_path, port, self,
                                    used_set)
                operator._go()
                operator._close()
                sys.stdout.flush()
                sys.stderr.flush()
            except:
                sys.stdout.flush()
                sys.stderr.flush()
                if operator is not None:
                    operator._connection._socket.send(struct.pack(">i", -2))
                else:
                    socket = SOCKET.socket(family=SOCKET.AF_INET,
                                           type=SOCKET.SOCK_STREAM)
                    socket.connect((SOCKET.gethostbyname("localhost"), port))
                    socket.send(struct.pack(">i", -2))
                    socket.close()
                raise
Beispiel #6
0
 def _configure(self, input_file, output_file, port, env, info, task_id):
     self._connection = Connection.BufferingTCPMappedFileConnection(input_file, output_file, port)
     self._iterator = Iterator.Iterator(self._connection, env)
     self._collector = Collector.Collector(self._connection, env, info)
     self.context = RuntimeContext.RuntimeContext(self._iterator, self._collector, task_id)
     self._env = env
     if info.chained_info is not None:
         info.chained_info.operator._configure_chain(self.context, self._collector, info.chained_info)
         self._collector = info.chained_info.operator
Beispiel #7
0
    def execute(self, local=False):
        """
        Triggers the program execution.

        The environment will execute all parts of the program that have resulted in a "sink" operation.
        """
        self._optimize_plan()

        if self._container.is_planning():
            port = int(sys.stdin.readline().rstrip('\n'))
            self._connection = Connection.PureTCPConnection(port)
            self._iterator = Iterator.PlanIterator(self._connection, self)
            self._collector = Collector.PlanCollector(self._connection, self)
            self._send_plan()
            result = self._receive_result()
            self._connection.close()
            return result
        else:
            import struct
            operator = None
            port = None
            try:
                if self._container.should_execute(self):
                    id = int(sys.stdin.readline().rstrip('\n'))

                    port = int(sys.stdin.readline().rstrip('\n'))
                    subtask_index = int(sys.stdin.readline().rstrip('\n'))
                    mmap_size = int(sys.stdin.readline().rstrip('\n'))
                    input_path = sys.stdin.readline().rstrip('\n')
                    output_path = sys.stdin.readline().rstrip('\n')

                    used_set = None
                    operator = None

                    for set in self._sets:
                        if set.id == id:
                            used_set = set
                            operator = set.operator
                    operator._configure(input_path, output_path, mmap_size,
                                        port, self, used_set, subtask_index)
                    operator._go()
                    operator._close()
                    sys.stdout.flush()
                    sys.stderr.flush()
            except:
                sys.stdout.flush()
                sys.stderr.flush()
                if operator is not None and operator._connection is not None:
                    operator._connection._socket.send(struct.pack(">i", -2))
                elif port is not None:
                    socket = SOCKET.socket(family=SOCKET.AF_INET,
                                           type=SOCKET.SOCK_STREAM)
                    socket.connect((SOCKET.gethostbyname("localhost"), port))
                    socket.send(struct.pack(">i", -2))
                    socket.close()
                raise
Beispiel #8
0
    def computeSplits(self, env, con):
        iterator = Iterator.PlanIterator(con, env)
        collector = Collector.SplitCollector(con, env)

        min_num_splits = iterator.next()
        path = iterator.next()

        self.createInputSplits(min_num_splits, path, collector)

        collector._close()
 def _configure(self, input_file, output_file, port):
     if self._combine:
         self._connection = Connection.BufferingTCPMappedFileConnection(
             input_file, output_file, port)
         self._iterator = Iterator.Iterator(self._connection)
         self._collector = Collector.Collector(self._connection)
         self.context = RuntimeContext.RuntimeContext(
             self._iterator, self._collector)
         self._run = self._run_combine
     else:
         self._connection = Connection.BufferingTCPMappedFileConnection(
             input_file, output_file, port)
         self._iterator = Iterator.Iterator(self._connection)
         self._group_iterator = Iterator.GroupIterator(
             self._iterator, self._keys)
         self.context = RuntimeContext.RuntimeContext(
             self._iterator, self._collector)
         self._configure_chain(Collector.Collector(self._connection))
     self._open()
Beispiel #10
0
 def _configure(self, input_file, output_file, port, env, info,
                subtask_index):
     super(ReduceFunction, self)._configure(input_file, output_file, port,
                                            env, info, subtask_index)
     if len(info.key1) == 0:
         self._run = self._run_all_reduce
     else:
         self._run = self._run_grouped_reduce
         self._group_iterator = Iterator.GroupIterator(
             self._iterator, info.key1)
Beispiel #11
0
 def _sort_and_combine(self):
     values = self._values
     function = self.combine
     collector = self._collector
     extractor = self._extract_keys
     grouping = defaultdict(list)
     for value in values:
         grouping[extractor(value)].append(value)
     keys = list(grouping.keys())
     keys.sort()
     for key in keys:
         iterator = Iterator.ListIterator(grouping[key])
         base = iterator.next()
         while iterator.has_next():
             base = function(base, iterator.next())
         collector.collect(base)
     self._values = []
Beispiel #12
0
 def _sort_and_combine(self):
     values = self._values
     function = self.combine
     collector = self._collector
     extractor = self._extract_keys
     grouping = defaultdict(list)
     for value in values:
         grouping[extractor(value)].append(value)
     keys = list(grouping.keys())
     keys.sort()
     for key in keys:
         values = grouping[key]
         for op in reversed(self._sort_ops):
             values.sort(key=lambda x: x[op[0]],
                         reverse=op[1] == Order.DESCENDING)
         result = function(Iterator.ListIterator(values), collector)
         if result is not None:
             for res in result:
                 collector.collect(res)
     self._values = []