def _write_text(self, path, write_mode): child = OperationInfo() child.identifier = _Identifier.SINK_TEXT child.parent = self._info child.path = path child.write_mode = write_mode self._info.sinks.append(child) self._env._sinks.append(child)
def _write_csv(self, path, line_delimiter, field_delimiter, write_mode): child = OperationInfo() child.identifier = _Identifier.SINK_CSV child.path = path child.parent = self._info child.delimiter_field = field_delimiter child.delimiter_line = line_delimiter child.write_mode = write_mode self._info.sinks.append(child) self._env._sinks.append(child)
def _write_text(self, path, write_mode): child = OperationInfo() child_set = DataSink(self._env, child) child.identifier = _Identifier.SINK_TEXT child.parent = self._info child.path = path child.write_mode = write_mode self._info.parallelism = child.parallelism self._info.sinks.append(child) self._env._sinks.append(child) return child_set
def write_text(self, path, write_mode=WriteMode.NO_OVERWRITE): """ Writes a DataSet as a text file to the specified location. :param path: he path pointing to the location the text file is written to. :param write_mode: OutputFormat.WriteMode value, indicating whether files should be overwritten """ child = OperationInfo() child.identifier = _Identifier.SINK_TEXT child.parent = self._info child.path = path child.write_mode = write_mode self._info.sinks.append(child) self._env._sinks.append(child)
def read_text(self, path): """ Creates a DataSet that represents the Strings produced by reading the given file line wise. The file will be read with the system's default character set. :param path: The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path"). :return: A DataSet that represents the data read from the given file as text lines. """ child = OperationInfo() child_set = DataSet(self, child) child.identifier = _Identifier.SOURCE_TEXT child.path = path self._sources.append(child) return child_set
def read_custom(self, path, filter, splits, format): """ Creates a DataSet using a custom input format that is executed directly in the Python process. """ child = OperationInfo() child_set = DataSet(self, child) child.identifier = _Identifier.SOURCE_CUSTOM child.name = "PythonInputFormat" child.path = path child.filter = filter child.computeSplits = splits child.operator = copy.deepcopy(format) child.types = _createArrayTypeInfo() self._sources.append(child) return child_set
def read_csv(self, path, types, line_delimiter="\n", field_delimiter=','): """ Create a DataSet that represents the tuples produced by reading the given CSV file. :param path: The path of the CSV file. :param types: Specifies the types for the CSV fields. :return:A CsvReader that can be used to configure the CSV input. """ child = OperationInfo() child_set = DataSet(self, child) child.identifier = _Identifier.SOURCE_CSV child.delimiter_line = line_delimiter child.delimiter_field = field_delimiter child.path = path child.types = types self._sources.append(child) return child_set
def write_csv(self, path, line_delimiter="\n", field_delimiter=',', write_mode=WriteMode.NO_OVERWRITE): """ Writes a Tuple DataSet as a CSV file to the specified location. Note: Only a Tuple DataSet can written as a CSV file. :param path: The path pointing to the location the CSV file is written to. :param write_mode: OutputFormat.WriteMode value, indicating whether files should be overwritten """ child = OperationInfo() child.identifier = _Identifier.SINK_CSV child.path = path child.parent = self._info child.delimiter_field = field_delimiter child.delimiter_line = line_delimiter child.write_mode = write_mode self._info.sinks.append(child) self._env._sinks.append(child)