def test_write(single_item_skeleton): dir = pathlib.Path(single_item_skeleton) fields = tsdb.read_schema(dir)['item'] path = dir.joinpath('item') tsdb.write(dir, 'item', [(0, 'The cat meows.')], fields) with tsdb.open(dir, 'item') as fh: assert list(fh) == ['0@The cat meows.\n'] tsdb.write(dir, 'item', [(1, 'The wolf howls.')], fields, append=True) with tsdb.open(dir, 'item') as fh: assert list(fh) == ['0@The cat meows.\n', '1@The wolf howls.\n'] # cannot append and gzip at same time with pytest.raises(NotImplementedError): tsdb.write(dir, 'item', [], fields, gzip=True, append=True) tsdb.write(dir, 'item', [(0, 'The cat meows.')], fields, gzip=True) assert not path.with_suffix('').exists() assert path.with_suffix('.gz').exists() # cannot append to existing gzipped file with pytest.raises(NotImplementedError): tsdb.write(dir, 'item', [], fields, append=True) tsdb.write(dir, 'item', [(0, 'The cat meows.')], fields) assert path.with_suffix('').exists() assert not path.with_suffix('.gz').exists() tsdb.write(dir, 'item', [(0, 'The cat meows.')], fields, gzip=False) assert not path.with_suffix('.gz').exists() assert path.with_suffix('').exists() tsdb.write(dir, 'item', [], fields, gzip=True) assert not path.with_suffix('.gz').exists() assert path.with_suffix('').exists()
def test_open(single_item_skeleton, gzipped_single_item_skeleton): with pytest.raises(tsdb.TSDBError): tsdb.open(single_item_skeleton, 'non') fh = tsdb.open(single_item_skeleton, 'item') assert not fh.closed with fh: assert list(fh) == ['0@The dog barks.\n'] assert fh.closed with tsdb.open(single_item_skeleton, 'item') as fh: assert list(fh) == ['0@The dog barks.\n'] with tsdb.open(gzipped_single_item_skeleton, 'item') as fh: assert list(fh) == ['0@The dog barks.\n']
def _iterslice(self, slice: slice) -> List[Row]: """Yield rows from a slice index.""" with tsdb.open(self.dir, self.name, encoding=self.encoding) as fh: rows = [row for _, row in self._enum_rows(fh, slice)] if slice.step is not None and slice.step < 0: rows = list(reversed(rows)) return rows
def select(self, *names: str, cast: bool = True) -> Iterator[tsdb.Record]: """ Select fields given by *names* from each row in the table. If no field names are given, all fields are returned. If *cast* is `False`, simple tuples of raw data are returned instead of :class:`Row` objects. Yields: Row Examples: >>> next(table.select()) Row(10, 'unknown', 'formal', 'none', 1, 'S', 'It rained.', ...) >>> next(table.select('i-id')) Row(10) >>> next(table.select('i-id', 'i-input')) Row(10, 'It rained.') >>> next(table.select('i-id', 'i-input'), cast=False) ('10', 'It rained.') """ indices = tuple(map(self._field_index.__getitem__, names)) fields = tuple(map(self.fields.__getitem__, indices)) field_index = tsdb.make_field_index(fields) with tsdb.open(self.dir, self.name, encoding=self.encoding) as fh: for _, row in self._enum_rows(fh): data = tuple(row.data[i] for i in indices) if cast: yield Row(fields, data, field_index=field_index) else: yield data
def __iter__(self) -> Iterator[Row]: if self._file is not None: self._file.close() fh: IO[str] = tsdb.open(self.dir, self.name, encoding=self.encoding) self._file = fh for _, row in self._enum_rows(fh): yield row
def test_issue_285(empty_testsuite): fields = tsdb.read_schema(empty_testsuite)['item'] tsdb.write(empty_testsuite, 'item', [(0, 'The cat meows.\r')], fields) fh = tsdb.open(empty_testsuite, 'item') assert not fh.closed with fh: assert list(fh) == ['0@The cat meows.\r\n'] assert fh.closed
def _sync_with_file(self) -> None: """Clear in-memory structures so table is synced with the file.""" self._rows = [] i = -1 with tsdb.open(self.dir, self.name, encoding=self.encoding) as lines: for i, line in enumerate(lines): self._rows.append(None) self._persistent_count = i + 1 self._volatile_index = i + 1
def _getitem(self, index: int) -> Row: """Get a single non-slice index.""" row = self._rows[index] if row is None: # need to handle negative indices manually if index < 0: index = len(self._rows) + index with tsdb.open(self.dir, self.name, encoding=self.encoding) as lines: for i, line in enumerate(lines): if i == index: row = Row(self.fields, tsdb.split(line), field_index=self._field_index) break if row is None: raise ITSDBError('could not retrieve row {}'.format(index)) return row
def process(self, cpu: interface.Processor, selector: Tuple[str, str] = None, source: tsdb.Database = None, fieldmapper: FieldMapper = None, gzip: bool = False, buffer_size: int = 1000) -> None: """ Process each item in a [incr tsdb()] test suite. The output rows will be flushed to disk when the number of new rows in a table is *buffer_size*. Args: cpu (:class:`~delphin.interface.Processor`): processor interface (e.g., :class:`~delphin.ace.ACEParser`) selector: a pair of (table_name, column_name) that specify the table and column used for processor input (e.g., `('item', 'i-input')`) source (:class:`~delphin.tsdb.Database`): test suite from which inputs are taken; if `None`, use the current test suite fieldmapper (:class:`FieldMapper`): object for mapping response fields to [incr tsdb()] fields; if `None`, use a default mapper for the standard schema gzip: if `True`, compress non-empty tables with gzip buffer_size (int): number of output rows to hold in memory before flushing to disk; ignored if the test suite is all in-memory; if `None`, do not flush to disk Examples: >>> ts.process(ace_parser) >>> ts.process(ace_generator, 'result:mrs', source=ts2) """ if selector is None: assert isinstance(cpu.task, str) input_table, input_column = _default_task_selectors[cpu.task] else: input_table, input_column = selector if (input_table not in self.schema or all(f.name != input_column for f in self.schema[input_table])): raise ITSDBError('invalid table or column: {!s}, {!s}' .format(input_table, input_column)) if source is None: source = self if fieldmapper is None: fieldmapper = FieldMapper(source=source) index = tsdb.make_field_index(source.schema[input_table]) affected = set(fieldmapper.affected_tables).intersection(self.schema) for name in affected: self[name].clear() key_names = [f.name for f in source.schema[input_table] if f.is_key] bar = None if not logger.isEnabledFor(logging.INFO): with tsdb.open(source.path, input_table) as fh: total = sum(1 for _ in fh) if total > 0: bar = ProgressBar('Processing', max=total) for row in source[input_table]: datum = row[index[input_column]] keys = [row[index[name]] for name in key_names] keys_dict = dict(zip(key_names, keys)) response = cpu.process_item(datum, keys=keys_dict) logger.info( 'Processed item {:>16} {:>8} results' .format(tsdb.join(keys), len(response['results'])) ) if bar: bar.next() for tablename, data in fieldmapper.map(response): _add_row(self, tablename, data, buffer_size) for tablename, data in fieldmapper.cleanup(): _add_row(self, tablename, data, buffer_size) if bar: bar.finish() tsdb.write_database(self, self.path, gzip=gzip)