コード例 #1
0
ファイル: DataSet.py プロジェクト: jianghe2020/flink
 def _distinct(self, fields):
     self._info.types = _createKeyValueTypeInfo(len(fields))
     child = OperationInfo()
     child_set = DataSet(self._env, child)
     child.identifier = _Identifier.DISTINCT
     child.parent = self._info
     child.keys = fields
     self._info.children.append(child)
     self._env._sets.append(child)
     return child_set
コード例 #2
0
 def _distinct(self, fields):
     self._info.types = _createKeyValueTypeInfo(len(fields))
     child = OperationInfo()
     child_set = DataSet(self._env, child)
     child.identifier = _Identifier.DISTINCT
     child.parent = self._info
     child.keys = fields
     self._info.children.append(child)
     self._env._sets.append(child)
     return child_set
コード例 #3
0
ファイル: DataSet.py プロジェクト: chiwanpark/flink
 def _group_by(self, keys):
     child = OperationInfo()
     child_chain = []
     child_set = UnsortedGrouping(self._env, child, child_chain)
     child.identifier = _Identifier.GROUP
     child.parent = self._info
     child.keys = keys
     child_chain.append(child)
     self._info.children.append(child)
     self._env._sets.append(child)
     return child_set
コード例 #4
0
ファイル: DataSet.py プロジェクト: tarunnar/pyflink
 def _group_by(self, keys):
     child = OperationInfo()
     child_chain = []
     child_set = UnsortedGrouping(self._env, child, child_chain)
     child.identifier = _Identifier.GROUP
     child.parent = self._info
     child.keys = keys
     child_chain.append(child)
     self._info.children.append(child)
     self._env._sets.append(child)
     return child_set
コード例 #5
0
ファイル: DataSet.py プロジェクト: jianghe2020/flink
    def _partition_by_hash(self, fields):
        """
        Hash-partitions a DataSet on the specified key fields.
        Important:This operation shuffles the whole DataSet over the network and can take significant amount of time.

        :param fields: The field indexes on which the DataSet is hash-partitioned.
        :return: The partitioned DataSet.
        """
        self._info.types = _createKeyValueTypeInfo(len(fields))
        child = OperationInfo()
        child_set = DataSet(self._env, child)
        child.identifier = _Identifier.PARTITION_HASH
        child.parent = self._info
        child.keys = fields
        self._info.children.append(child)
        self._env._sets.append(child)
        return child_set
コード例 #6
0
    def _partition_by_hash(self, fields):
        """
        Hash-partitions a DataSet on the specified key fields.
        Important:This operation shuffles the whole DataSet over the network and can take significant amount of time.

        :param fields: The field indexes on which the DataSet is hash-partitioned.
        :return: The partitioned DataSet.
        """
        self._info.types = _createKeyValueTypeInfo(len(fields))
        child = OperationInfo()
        child_set = DataSet(self._env, child)
        child.identifier = _Identifier.PARTITION_HASH
        child.parent = self._info
        child.keys = fields
        self._info.children.append(child)
        self._env._sets.append(child)
        return child_set
コード例 #7
0
    def project(self, *fields):
        """
        Applies a Project transformation on a Tuple DataSet.

        Note: Only Tuple DataSets can be projected. The transformation projects each Tuple of the DataSet onto a
        (sub)set of fields.

        :param fields: The field indexes of the input tuples that are retained.
                        The order of fields in the output tuple corresponds to the order of field indexes.
        :return: The projected DataSet.

        """
        child = OperationInfo()
        child_set = DataSet(self._env, child)
        child.identifier = _Identifier.PROJECTION
        child.parent = self._info
        child.keys = fields
        self._info.children.append(child)
        self._env._sets.append(child)
        return child_set
コード例 #8
0
ファイル: DataSet.py プロジェクト: SanthoshPoudapally/flink
    def project(self, *fields):
        """
        Applies a Project transformation on a Tuple DataSet.

        Note: Only Tuple DataSets can be projected. The transformation projects each Tuple of the DataSet onto a
        (sub)set of fields.

        :param fields: The field indexes of the input tuples that are retained.
                        The order of fields in the output tuple corresponds to the order of field indexes.
        :return: The projected DataSet.

        """
        child = OperationInfo()
        child_set = DataSet(self._env, child)
        child.identifier = _Identifier.PROJECTION
        child.parent = self._info
        child.keys = fields
        self._info.children.append(child)
        self._env._sets.append(child)
        return child_set
コード例 #9
0
    def group_by(self, *keys):
        """
        Groups a Tuple DataSet using field position keys.
        Note: Field position keys only be specified for Tuple DataSets.
        The field position keys specify the fields of Tuples on which the DataSet is grouped.
        This method returns an UnsortedGrouping on which one of the following grouping transformation can be applied.
        sort_group() to get a SortedGrouping.
        reduce() to apply a Reduce transformation.
        group_reduce() to apply a GroupReduce transformation.

        :param keys: One or more field positions on which the DataSet will be grouped.
        :return:A Grouping on which a transformation needs to be applied to obtain a transformed DataSet.
        """
        child = OperationInfo()
        child_chain = []
        child_set = UnsortedGrouping(self._env, child, child_chain)
        child.identifier = _Identifier.GROUP
        child.parent = self._info
        child.keys = keys
        child_chain.append(child)
        self._info.children.append(child)
        self._env._sets.append(child)
        return child_set
コード例 #10
0
ファイル: DataSet.py プロジェクト: SanthoshPoudapally/flink
    def group_by(self, *keys):
        """
        Groups a Tuple DataSet using field position keys.
        Note: Field position keys only be specified for Tuple DataSets.
        The field position keys specify the fields of Tuples on which the DataSet is grouped.
        This method returns an UnsortedGrouping on which one of the following grouping transformation can be applied.
        sort_group() to get a SortedGrouping.
        reduce() to apply a Reduce transformation.
        group_reduce() to apply a GroupReduce transformation.

        :param keys: One or more field positions on which the DataSet will be grouped.
        :return:A Grouping on which a transformation needs to be applied to obtain a transformed DataSet.
        """
        child = OperationInfo()
        child_chain = []
        child_set = UnsortedGrouping(self._env, child, child_chain)
        child.identifier = _Identifier.GROUP
        child.parent = self._info
        child.keys = keys
        child_chain.append(child)
        self._info.children.append(child)
        self._env._sets.append(child)
        return child_set