Beispiel #1
0
def test_write(single_item_skeleton):
    dir = pathlib.Path(single_item_skeleton)
    fields = tsdb.read_schema(dir)['item']
    path = dir.joinpath('item')
    tsdb.write(dir, 'item', [(0, 'The cat meows.')], fields)
    with tsdb.open(dir, 'item') as fh:
        assert list(fh) == ['0@The cat meows.\n']
    tsdb.write(dir, 'item', [(1, 'The wolf howls.')], fields, append=True)
    with tsdb.open(dir, 'item') as fh:
        assert list(fh) == ['0@The cat meows.\n', '1@The wolf howls.\n']
    # cannot append and gzip at same time
    with pytest.raises(NotImplementedError):
        tsdb.write(dir, 'item', [], fields, gzip=True, append=True)
    tsdb.write(dir, 'item', [(0, 'The cat meows.')], fields, gzip=True)
    assert not path.with_suffix('').exists()
    assert path.with_suffix('.gz').exists()
    # cannot append to existing gzipped file
    with pytest.raises(NotImplementedError):
        tsdb.write(dir, 'item', [], fields, append=True)
    tsdb.write(dir, 'item', [(0, 'The cat meows.')], fields)
    assert path.with_suffix('').exists()
    assert not path.with_suffix('.gz').exists()
    tsdb.write(dir, 'item', [(0, 'The cat meows.')], fields, gzip=False)
    assert not path.with_suffix('.gz').exists()
    assert path.with_suffix('').exists()
    tsdb.write(dir, 'item', [], fields, gzip=True)
    assert not path.with_suffix('.gz').exists()
    assert path.with_suffix('').exists()
Beispiel #2
0
def test_open(single_item_skeleton, gzipped_single_item_skeleton):
    with pytest.raises(tsdb.TSDBError):
        tsdb.open(single_item_skeleton, 'non')

    fh = tsdb.open(single_item_skeleton, 'item')
    assert not fh.closed
    with fh:
        assert list(fh) == ['0@The dog barks.\n']
    assert fh.closed

    with tsdb.open(single_item_skeleton, 'item') as fh:
        assert list(fh) == ['0@The dog barks.\n']
    with tsdb.open(gzipped_single_item_skeleton, 'item') as fh:
        assert list(fh) == ['0@The dog barks.\n']
Beispiel #3
0
 def _iterslice(self, slice: slice) -> List[Row]:
     """Yield rows from a slice index."""
     with tsdb.open(self.dir, self.name, encoding=self.encoding) as fh:
         rows = [row for _, row in self._enum_rows(fh, slice)]
         if slice.step is not None and slice.step < 0:
             rows = list(reversed(rows))
         return rows
Beispiel #4
0
    def select(self, *names: str, cast: bool = True) -> Iterator[tsdb.Record]:
        """
        Select fields given by *names* from each row in the table.

        If no field names are given, all fields are returned.

        If *cast* is `False`, simple tuples of raw data are returned
        instead of :class:`Row` objects.

        Yields:
            Row
        Examples:
            >>> next(table.select())
            Row(10, 'unknown', 'formal', 'none', 1, 'S', 'It rained.', ...)
            >>> next(table.select('i-id'))
            Row(10)
            >>> next(table.select('i-id', 'i-input'))
            Row(10, 'It rained.')
            >>> next(table.select('i-id', 'i-input'), cast=False)
            ('10', 'It rained.')
        """
        indices = tuple(map(self._field_index.__getitem__, names))
        fields = tuple(map(self.fields.__getitem__, indices))
        field_index = tsdb.make_field_index(fields)
        with tsdb.open(self.dir, self.name, encoding=self.encoding) as fh:
            for _, row in self._enum_rows(fh):
                data = tuple(row.data[i] for i in indices)
                if cast:
                    yield Row(fields, data, field_index=field_index)
                else:
                    yield data
Beispiel #5
0
    def __iter__(self) -> Iterator[Row]:
        if self._file is not None:
            self._file.close()
        fh: IO[str] = tsdb.open(self.dir, self.name, encoding=self.encoding)
        self._file = fh

        for _, row in self._enum_rows(fh):
            yield row
Beispiel #6
0
def test_issue_285(empty_testsuite):
    fields = tsdb.read_schema(empty_testsuite)['item']
    tsdb.write(empty_testsuite, 'item', [(0, 'The cat meows.\r')], fields)
    fh = tsdb.open(empty_testsuite, 'item')
    assert not fh.closed
    with fh:
        assert list(fh) == ['0@The cat meows.\r\n']
    assert fh.closed
Beispiel #7
0
 def _sync_with_file(self) -> None:
     """Clear in-memory structures so table is synced with the file."""
     self._rows = []
     i = -1
     with tsdb.open(self.dir, self.name, encoding=self.encoding) as lines:
         for i, line in enumerate(lines):
             self._rows.append(None)
     self._persistent_count = i + 1
     self._volatile_index = i + 1
Beispiel #8
0
 def _getitem(self, index: int) -> Row:
     """Get a single non-slice index."""
     row = self._rows[index]
     if row is None:
         # need to handle negative indices manually
         if index < 0:
             index = len(self._rows) + index
         with tsdb.open(self.dir, self.name,
                        encoding=self.encoding) as lines:
             for i, line in enumerate(lines):
                 if i == index:
                     row = Row(self.fields,
                               tsdb.split(line),
                               field_index=self._field_index)
                     break
     if row is None:
         raise ITSDBError('could not retrieve row {}'.format(index))
     return row
Beispiel #9
0
    def process(self,
                cpu: interface.Processor,
                selector: Tuple[str, str] = None,
                source: tsdb.Database = None,
                fieldmapper: FieldMapper = None,
                gzip: bool = False,
                buffer_size: int = 1000) -> None:
        """
        Process each item in a [incr tsdb()] test suite.

        The output rows will be flushed to disk when the number of new
        rows in a table is *buffer_size*.

        Args:
            cpu (:class:`~delphin.interface.Processor`): processor
                interface (e.g., :class:`~delphin.ace.ACEParser`)
            selector: a pair of (table_name, column_name) that specify
                the table and column used for processor input (e.g.,
                `('item', 'i-input')`)
            source (:class:`~delphin.tsdb.Database`): test suite from
                which inputs are taken; if `None`, use the current
                test suite
            fieldmapper (:class:`FieldMapper`): object for
                mapping response fields to [incr tsdb()] fields; if
                `None`, use a default mapper for the standard schema
            gzip: if `True`, compress non-empty tables with gzip
            buffer_size (int): number of output rows to hold in memory
                before flushing to disk; ignored if the test suite is all
                in-memory; if `None`, do not flush to disk
        Examples:
            >>> ts.process(ace_parser)
            >>> ts.process(ace_generator, 'result:mrs', source=ts2)
        """
        if selector is None:
            assert isinstance(cpu.task, str)
            input_table, input_column = _default_task_selectors[cpu.task]
        else:
            input_table, input_column = selector
        if (input_table not in self.schema
            or all(f.name != input_column
                   for f in self.schema[input_table])):
            raise ITSDBError('invalid table or column: {!s}, {!s}'
                             .format(input_table, input_column))
        if source is None:
            source = self
        if fieldmapper is None:
            fieldmapper = FieldMapper(source=source)
        index = tsdb.make_field_index(source.schema[input_table])

        affected = set(fieldmapper.affected_tables).intersection(self.schema)
        for name in affected:
            self[name].clear()

        key_names = [f.name for f in source.schema[input_table] if f.is_key]

        bar = None
        if not logger.isEnabledFor(logging.INFO):
            with tsdb.open(source.path, input_table) as fh:
                total = sum(1 for _ in fh)
            if total > 0:
                bar = ProgressBar('Processing', max=total)

        for row in source[input_table]:
            datum = row[index[input_column]]
            keys = [row[index[name]] for name in key_names]
            keys_dict = dict(zip(key_names, keys))
            response = cpu.process_item(datum, keys=keys_dict)

            logger.info(
                'Processed item {:>16}  {:>8} results'
                .format(tsdb.join(keys), len(response['results']))
            )
            if bar:
                bar.next()

            for tablename, data in fieldmapper.map(response):
                _add_row(self, tablename, data, buffer_size)

        for tablename, data in fieldmapper.cleanup():
            _add_row(self, tablename, data, buffer_size)

        if bar:
            bar.finish()

        tsdb.write_database(self, self.path, gzip=gzip)