def __init__(self, dir: util.PathLike, name: str, fields: tsdb.Fields, encoding: str = 'utf-8') -> None: self.dir = Path(dir).expanduser() self.name = name self.fields: Sequence[tsdb.Field] = fields self._field_index = tsdb.make_field_index(fields) self.encoding = encoding try: tsdb.get_path(self.dir, name) except tsdb.TSDBError: # file didn't exist as plain-text or gzipped, so create it path = self.dir.joinpath(name) path.write_text('') self._rows: List[Optional[Row]] = [] # storing the open file for __iter__ let's Table.close() work self._file: Optional[IO[str]] = None # These two numbers are needed to track if changes to the # table are only additions or if they remove/alter existing # rows. The first is the number of rows in the file and the # second is the index of the first unwritten row self._persistent_count = 0 self._volatile_index = 0 self._sync_with_file()
def select(self, *names: str, cast: bool = True) -> Iterator[tsdb.Record]: """ Select fields given by *names* from each row in the table. If no field names are given, all fields are returned. If *cast* is `False`, simple tuples of raw data are returned instead of :class:`Row` objects. Yields: Row Examples: >>> next(table.select()) Row(10, 'unknown', 'formal', 'none', 1, 'S', 'It rained.', ...) >>> next(table.select('i-id')) Row(10) >>> next(table.select('i-id', 'i-input')) Row(10, 'It rained.') >>> next(table.select('i-id', 'i-input'), cast=False) ('10', 'It rained.') """ indices = tuple(map(self._field_index.__getitem__, names)) fields = tuple(map(self.fields.__getitem__, indices)) field_index = tsdb.make_field_index(fields) with tsdb.open(self.dir, self.name, encoding=self.encoding) as fh: for _, row in self._enum_rows(fh): data = tuple(row.data[i] for i in indices) if cast: yield Row(fields, data, field_index=field_index) else: yield data
def __init__(self, fields: tsdb.Fields, data: Sequence[tsdb.Value], field_index: tsdb.FieldIndex = None): if len(data) != len(fields): raise ITSDBError( 'number of columns ({}) != number of fields ({})'.format( len(data), len(fields))) if field_index is None: field_index = tsdb.make_field_index(fields) self.fields = fields self.data = tuple( tsdb.format(f.datatype, val) for f, val in zip(fields, data)) self._field_index = field_index
def process(self, cpu: interface.Processor, selector: Tuple[str, str] = None, source: tsdb.Database = None, fieldmapper: FieldMapper = None, gzip: bool = False, buffer_size: int = 1000) -> None: """ Process each item in a [incr tsdb()] test suite. The output rows will be flushed to disk when the number of new rows in a table is *buffer_size*. Args: cpu (:class:`~delphin.interface.Processor`): processor interface (e.g., :class:`~delphin.ace.ACEParser`) selector: a pair of (table_name, column_name) that specify the table and column used for processor input (e.g., `('item', 'i-input')`) source (:class:`TestSuite`, :class:`Table`): test suite or table from which inputs are taken; if `None`, use the current test suite fieldmapper (:class:`FieldMapper`): object for mapping response fields to [incr tsdb()] fields; if `None`, use a default mapper for the standard schema gzip: if `True`, compress non-empty tables with gzip buffer_size (int): number of output rows to hold in memory before flushing to disk; ignored if the test suite is all in-memory; if `None`, do not flush to disk Examples: >>> ts.process(ace_parser) >>> ts.process(ace_generator, 'result:mrs', source=ts2) """ if selector is None: assert isinstance(cpu.task, str) input_table, input_column = _default_task_selectors[cpu.task] else: input_table, input_column = selector if (input_table not in self.schema or all(f.name != input_column for f in self.schema[input_table])): raise ITSDBError('invalid table or column: {!s}, {!s}'.format( input_table, input_column)) if source is None: source = self if fieldmapper is None: fieldmapper = FieldMapper() index = tsdb.make_field_index(source.schema[input_table]) affected = set(fieldmapper.affected_tables).intersection(self.schema) for name in affected: self[name].clear() key_names = [f.name for f in source.schema[input_table] if f.is_key] for row in source[input_table]: datum = row[index[input_column]] keys = [row[index[name]] for name in key_names] keys_dict = dict(zip(key_names, keys)) response = cpu.process_item(datum, keys=keys_dict) logger.info('Processed item {:>16} {:>8} results'.format( tsdb.join(keys), len(response['results']))) for tablename, data in fieldmapper.map(response): _add_row(self, tablename, data, buffer_size) for tablename, data in fieldmapper.cleanup(): _add_row(self, tablename, data, buffer_size) tsdb.write_database(self, self.path, gzip=gzip)