Example #1
0
    def to_table_data(self):
        if typepy.is_empty_sequence(self._loader.headers):
            headers = self._source_data[0]

            if any([typepy.is_null_string(header) for header in headers]):
                raise DataError("the first line includes empty string item."
                                "all of the items should contain header name."
                                "actual={}".format(headers))

            data_matrix = self._source_data[1:]
        else:
            headers = self._loader.headers
            data_matrix = self._source_data

        if not data_matrix:
            raise DataError("data row must be greater or equal than one")

        self._loader.inc_table_count()

        yield TableData(
            self._loader.make_table_name(),
            headers,
            data_matrix,
            dp_extractor=self._loader.dp_extractor,
            type_hints=self._extract_type_hints(headers),
        )
Example #2
0
    def _to_data_matrix(self):
        from collections import OrderedDict

        data_matrix = []

        for row_idx, row in enumerate(self._ltsv_input_stream):
            if typepy.is_empty_sequence(row):
                continue

            ltsv_record = OrderedDict()
            for col_idx, ltsv_item in enumerate(row.strip().split("\t")):
                try:
                    label, value = ltsv_item.split(":")
                except ValueError:
                    raise DataError(
                        "invalid lstv item found: line={}, col={}, item='{}'".
                        format(row_idx, col_idx, ltsv_item))

                label = label.strip('"')

                try:
                    pv.validate_ltsv_label(label)
                except (pv.NullNameError, pv.InvalidCharError):
                    raise InvalidHeaderNameError(
                        "invalid label found (acceptable chars are [0-9A-Za-z_.-]): "
                        "line={}, col={}, label='{}'".format(
                            row_idx, col_idx, label))

                ltsv_record[label] = value

            data_matrix.append(ltsv_record)

        # using generator to prepare for future enhancement to support
        # iterative load.
        yield data_matrix
Example #3
0
 def _to_data_matrix(self):
     try:
         return [[self.__modify_item(data) for data in row]
                 for row in self._csv_reader
                 if typepy.is_not_empty_sequence(row)]
     except (csv.Error, UnicodeDecodeError) as e:
         raise DataError(e)
Example #4
0
    def _get_start_row_idx(self):
        for row_idx in range(self.start_row, self._row_count):
            if self.__is_header_row(row_idx):
                break
        else:
            raise DataError("header row not found")

        return row_idx
Example #5
0
 def _validate_source_data(self):
     if not self._source_data:
         raise DataError("source data is empty")
Example #6
0
 def validate(self):
     if typepy.is_null_string(self.source):
         raise DataError("data source is empty")