예제 #1
0
 def _send_sinks(self):
     for sink in self._sinks:
         identifier = sink[_Fields.IDENTIFIER]
         collect = self._collector.collect
         collect(identifier)
         collect(sink[_Fields.PARENT][_Fields.ID])
         for case in Switch(identifier):
             if case(_Identifier.SINK_CSV):
                 collect(sink[_Fields.PATH])
                 collect(sink[_Fields.DELIMITER_FIELD])
                 collect(sink[_Fields.DELIMITER_LINE])
                 collect(sink[_Fields.WRITE_MODE])
                 break
             if case(_Identifier.SINK_TEXT):
                 collect(sink[_Fields.PATH])
                 collect(sink[_Fields.WRITE_MODE])
                 break
             if case(_Identifier.SINK_PRINT):
                 collect(sink[_Fields.TO_ERR])
                 break
예제 #2
0
 def _send_sinks(self):
     for sink in self._sinks:
         identifier = sink.identifier
         collect = self._collector.collect
         collect(identifier)
         collect(sink.parent.id)
         for case in Switch(identifier):
             if case(_Identifier.SINK_CSV):
                 collect(sink.path)
                 collect(sink.delimiter_field)
                 collect(sink.delimiter_line)
                 collect(sink.write_mode)
                 break
             if case(_Identifier.SINK_TEXT):
                 collect(sink.path)
                 collect(sink.write_mode)
                 break
             if case(_Identifier.SINK_PRINT):
                 collect(sink.to_err)
                 break
예제 #3
0
 def _send_sources(self):
     for source in self._sources:
         identifier = source.identifier
         collect = self._collector.collect
         collect(identifier)
         collect(source.id)
         for case in Switch(identifier):
             if case(_Identifier.SOURCE_CSV):
                 collect(source.path)
                 collect(source.delimiter_field)
                 collect(source.delimiter_line)
                 collect(source.types)
                 break
             if case(_Identifier.SOURCE_TEXT):
                 collect(source.path)
                 break
             if case(_Identifier.SOURCE_VALUE):
                 collect(len(source.values))
                 for value in source.values:
                     collect(value)
                 break
예제 #4
0
 def _send_sources(self):
     for source in self._sources:
         identifier = source[_Fields.IDENTIFIER]
         collect = self._collector.collect
         collect(identifier)
         collect(source[_Fields.ID])
         for case in Switch(identifier):
             if case(_Identifier.SOURCE_CSV):
                 collect(source[_Fields.PATH])
                 collect(source[_Fields.DELIMITER_FIELD])
                 collect(source[_Fields.DELIMITER_LINE])
                 collect(source[_Fields.TYPES])
                 break
             if case(_Identifier.SOURCE_TEXT):
                 collect(source[_Fields.PATH])
                 break
             if case(_Identifier.SOURCE_VALUE):
                 collect(len(source[_Fields.VALUES]))
                 for value in source[_Fields.VALUES]:
                     collect(value)
                 break
예제 #5
0
 def _send_operations(self):
     collect = self._collector.collect
     for set in self._sets:
         identifier = set.identifier
         collect(set.identifier)
         collect(set.id)
         collect(set.parent.id)
         for case in Switch(identifier):
             if case(_Identifier.SORT):
                 collect(set.field)
                 collect(set.order)
                 break
             if case(_Identifier.GROUP):
                 collect(set.keys)
                 break
             if case(_Identifier.COGROUP):
                 collect(set.other.id)
                 collect(set.key1)
                 collect(set.key2)
                 collect(set.types)
                 collect(set.name)
                 break
             if case(_Identifier.CROSS, _Identifier.CROSSH,
                     _Identifier.CROSST):
                 collect(set.other.id)
                 collect(set.types)
                 collect(len(set.projections))
                 for p in set.projections:
                     collect(p[0])
                     collect(p[1])
                 collect(set.name)
                 break
             if case(_Identifier.REDUCE, _Identifier.GROUPREDUCE):
                 collect(set.types)
                 collect(set.combine)
                 collect(set.name)
                 break
             if case(_Identifier.JOIN, _Identifier.JOINH,
                     _Identifier.JOINT):
                 collect(set.key1)
                 collect(set.key2)
                 collect(set.other.id)
                 collect(set.types)
                 collect(len(set.projections))
                 for p in set.projections:
                     collect(p[0])
                     collect(p[1])
                 collect(set.name)
                 break
             if case(_Identifier.MAP, _Identifier.MAPPARTITION,
                     _Identifier.FLATMAP, _Identifier.FILTER):
                 collect(set.types)
                 collect(set.name)
                 break
             if case(_Identifier.UNION):
                 collect(set.other.id)
                 break
             if case(_Identifier.PROJECTION):
                 collect(set.keys)
                 break
             if case():
                 raise KeyError(
                     "Environment._send_child_sets(): Invalid operation identifier: "
                     + str(identifier))
예제 #6
0
 def _send_operations(self):
     collect = self._collector.collect
     collectBytes = self._collector.collectBytes
     for set in self._sets:
         identifier = set.get(_Fields.IDENTIFIER)
         collect(set[_Fields.IDENTIFIER])
         collect(set[_Fields.ID])
         collect(set[_Fields.PARENT][_Fields.ID])
         for case in Switch(identifier):
             if case(_Identifier.SORT):
                 collect(set[_Fields.FIELD])
                 collect(set[_Fields.ORDER])
                 break
             if case(_Identifier.GROUP):
                 collect(set[_Fields.KEYS])
                 break
             if case(_Identifier.COGROUP):
                 collect(set[_Fields.OTHER][_Fields.ID])
                 collect(set[_Fields.KEY1])
                 collect(set[_Fields.KEY2])
                 collectBytes(_dump(set[_Fields.OPERATOR]))
                 collect(set[_Fields.META])
                 collect(set[_Fields.TYPES])
                 collect(set[_Fields.NAME])
                 break
             if case(_Identifier.CROSS, _Identifier.CROSSH,
                     _Identifier.CROSST):
                 collect(set[_Fields.OTHER][_Fields.ID])
                 if set[_Fields.OPERATOR] is None:
                     collect(set[_Fields.OPERATOR])
                 else:
                     collectBytes(_dump(set[_Fields.OPERATOR]))
                 collect(set[_Fields.META])
                 collect(set[_Fields.TYPES])
                 collect(len(set[_Fields.PROJECTIONS]))
                 for p in set[_Fields.PROJECTIONS]:
                     collect(p[0])
                     collect(p[1])
                 collect(set[_Fields.NAME])
                 break
             if case(_Identifier.REDUCE, _Identifier.GROUPREDUCE):
                 collectBytes(_dump(set[_Fields.OPERATOR]))
                 collectBytes(_dump(set[_Fields.COMBINEOP]))
                 collect(set[_Fields.META])
                 collect(set[_Fields.TYPES])
                 collect(set[_Fields.COMBINE])
                 collect(set[_Fields.NAME])
                 break
             if case(_Identifier.JOIN, _Identifier.JOINH,
                     _Identifier.JOINT):
                 collect(set[_Fields.KEY1])
                 collect(set[_Fields.KEY2])
                 collect(set[_Fields.OTHER][_Fields.ID])
                 if set[_Fields.OPERATOR] is None:
                     collect(set[_Fields.OPERATOR])
                 else:
                     collectBytes(_dump(set[_Fields.OPERATOR]))
                 collect(set[_Fields.META])
                 collect(set[_Fields.TYPES])
                 collect(len(set[_Fields.PROJECTIONS]))
                 for p in set[_Fields.PROJECTIONS]:
                     collect(p[0])
                     collect(p[1])
                 collect(set[_Fields.NAME])
                 break
             if case(_Identifier.MAP, _Identifier.MAPPARTITION,
                     _Identifier.FLATMAP, _Identifier.FILTER):
                 collectBytes(_dump(set[_Fields.OPERATOR]))
                 collect(set[_Fields.META])
                 collect(set[_Fields.TYPES])
                 collect(set[_Fields.NAME])
                 break
             if case(_Identifier.UNION):
                 collect(set[_Fields.OTHER][_Fields.ID])
                 break
             if case(_Identifier.PROJECTION):
                 collect(set[_Fields.KEYS])
                 break
             if case():
                 raise KeyError(
                     "Environment._send_child_sets(): Invalid operation identifier: "
                     + str(identifier))
예제 #7
0
 def _send_operations(self):
     collect = self._collector.collect
     for set in self._sets:
         identifier = set.identifier
         collect(set.identifier)
         collect(set.id)
         collect(set.parent.id)
         for case in Switch(identifier):
             if case(_Identifier.REBALANCE):
                 break
             if case(_Identifier.DISTINCT, _Identifier.PARTITION_HASH):
                 collect(set.keys)
                 break
             if case(_Identifier.FIRST):
                 collect(set.count)
                 break
             if case(_Identifier.SORT):
                 collect(set.field)
                 collect(set.order)
                 break
             if case(_Identifier.GROUP):
                 collect(set.keys)
                 break
             if case(_Identifier.COGROUP):
                 collect(set.other.id)
                 collect(set.key1)
                 collect(set.key2)
                 collect(set.types)
                 collect(set.name)
                 break
             if case(_Identifier.CROSS, _Identifier.CROSSH,
                     _Identifier.CROSST):
                 collect(set.other.id)
                 collect(set.uses_udf)
                 collect(set.types)
                 collect(set.name)
                 break
             if case(_Identifier.REDUCE, _Identifier.GROUPREDUCE):
                 collect(set.types)
                 collect(set.name)
                 break
             if case(_Identifier.JOIN, _Identifier.JOINH,
                     _Identifier.JOINT):
                 collect(set.key1)
                 collect(set.key2)
                 collect(set.other.id)
                 collect(set.uses_udf)
                 collect(set.types)
                 collect(set.name)
                 break
             if case(_Identifier.MAP, _Identifier.MAPPARTITION,
                     _Identifier.FLATMAP, _Identifier.FILTER):
                 collect(set.types)
                 collect(set.name)
                 break
             if case(_Identifier.UNION):
                 collect(set.other.id)
                 break
             if case(_Identifier.PROJECTION):
                 collect(set.keys)
                 break
             if case():
                 raise KeyError(
                     "Environment._send_child_sets(): Invalid operation identifier: "
                     + str(identifier))