Esempio n. 1
0
    def __init__(self,
                 dir: util.PathLike,
                 name: str,
                 fields: tsdb.Fields,
                 encoding: str = 'utf-8') -> None:
        self.dir = Path(dir).expanduser()
        self.name = name
        self.fields: Sequence[tsdb.Field] = fields
        self._field_index = tsdb.make_field_index(fields)
        self.encoding = encoding
        try:
            tsdb.get_path(self.dir, name)
        except tsdb.TSDBError:
            # file didn't exist as plain-text or gzipped, so create it
            path = self.dir.joinpath(name)
            path.write_text('')
        self._rows: List[Optional[Row]] = []
        # storing the open file for __iter__ let's Table.close() work
        self._file: Optional[IO[str]] = None

        # These two numbers are needed to track if changes to the
        # table are only additions or if they remove/alter existing
        # rows. The first is the number of rows in the file and the
        # second is the index of the first unwritten row
        self._persistent_count = 0
        self._volatile_index = 0

        self._sync_with_file()
Esempio n. 2
0
    def select(self, *names: str, cast: bool = True) -> Iterator[tsdb.Record]:
        """
        Select fields given by *names* from each row in the table.

        If no field names are given, all fields are returned.

        If *cast* is `False`, simple tuples of raw data are returned
        instead of :class:`Row` objects.

        Yields:
            Row
        Examples:
            >>> next(table.select())
            Row(10, 'unknown', 'formal', 'none', 1, 'S', 'It rained.', ...)
            >>> next(table.select('i-id'))
            Row(10)
            >>> next(table.select('i-id', 'i-input'))
            Row(10, 'It rained.')
            >>> next(table.select('i-id', 'i-input'), cast=False)
            ('10', 'It rained.')
        """
        indices = tuple(map(self._field_index.__getitem__, names))
        fields = tuple(map(self.fields.__getitem__, indices))
        field_index = tsdb.make_field_index(fields)
        with tsdb.open(self.dir, self.name, encoding=self.encoding) as fh:
            for _, row in self._enum_rows(fh):
                data = tuple(row.data[i] for i in indices)
                if cast:
                    yield Row(fields, data, field_index=field_index)
                else:
                    yield data
Esempio n. 3
0
 def __init__(self,
              fields: tsdb.Fields,
              data: Sequence[tsdb.Value],
              field_index: tsdb.FieldIndex = None):
     if len(data) != len(fields):
         raise ITSDBError(
             'number of columns ({}) != number of fields ({})'.format(
                 len(data), len(fields)))
     if field_index is None:
         field_index = tsdb.make_field_index(fields)
     self.fields = fields
     self.data = tuple(
         tsdb.format(f.datatype, val) for f, val in zip(fields, data))
     self._field_index = field_index
Esempio n. 4
0
    def process(self,
                cpu: interface.Processor,
                selector: Tuple[str, str] = None,
                source: tsdb.Database = None,
                fieldmapper: FieldMapper = None,
                gzip: bool = False,
                buffer_size: int = 1000) -> None:
        """
        Process each item in a [incr tsdb()] test suite.

        The output rows will be flushed to disk when the number of new
        rows in a table is *buffer_size*.

        Args:
            cpu (:class:`~delphin.interface.Processor`): processor
                interface (e.g., :class:`~delphin.ace.ACEParser`)
            selector: a pair of (table_name, column_name) that specify
                the table and column used for processor input (e.g.,
                `('item', 'i-input')`)
            source (:class:`TestSuite`, :class:`Table`): test suite or
                table from which inputs are taken; if `None`, use the
                current test suite
            fieldmapper (:class:`FieldMapper`): object for
                mapping response fields to [incr tsdb()] fields; if
                `None`, use a default mapper for the standard schema
            gzip: if `True`, compress non-empty tables with gzip
            buffer_size (int): number of output rows to hold in memory
                before flushing to disk; ignored if the test suite is all
                in-memory; if `None`, do not flush to disk
        Examples:
            >>> ts.process(ace_parser)
            >>> ts.process(ace_generator, 'result:mrs', source=ts2)
        """
        if selector is None:
            assert isinstance(cpu.task, str)
            input_table, input_column = _default_task_selectors[cpu.task]
        else:
            input_table, input_column = selector
        if (input_table not in self.schema
                or all(f.name != input_column
                       for f in self.schema[input_table])):
            raise ITSDBError('invalid table or column: {!s}, {!s}'.format(
                input_table, input_column))
        if source is None:
            source = self
        if fieldmapper is None:
            fieldmapper = FieldMapper()
        index = tsdb.make_field_index(source.schema[input_table])

        affected = set(fieldmapper.affected_tables).intersection(self.schema)
        for name in affected:
            self[name].clear()

        key_names = [f.name for f in source.schema[input_table] if f.is_key]

        for row in source[input_table]:
            datum = row[index[input_column]]
            keys = [row[index[name]] for name in key_names]
            keys_dict = dict(zip(key_names, keys))
            response = cpu.process_item(datum, keys=keys_dict)
            logger.info('Processed item {:>16}  {:>8} results'.format(
                tsdb.join(keys), len(response['results'])))
            for tablename, data in fieldmapper.map(response):
                _add_row(self, tablename, data, buffer_size)

        for tablename, data in fieldmapper.cleanup():
            _add_row(self, tablename, data, buffer_size)

        tsdb.write_database(self, self.path, gzip=gzip)