def make_skeleton(path, relations, item_rows, gzip=False): """ Instantiate a new profile skeleton (only the relations file and item file) from an existing relations file and a list of rows for the item table. For standard relations files, it is suggested to have, as a minimum, the `i-id` and `i-input` fields in the item rows. Args: path: the destination directory of the skeleton---must not already exist, as it will be created relations: the path to the relations file item_rows: the rows to use for the item file gzip: if True, the item file will be compressed Returns: An ItsdbProfile containing the skeleton data (but the profile data will already have been written to disk). Raises: ItsdbError if the destination directory could not be created. """ try: os.makedirs(path) except OSError: raise ItsdbError('Path already exists: {}.'.format(path)) import shutil shutil.copyfile(relations, os.path.join(path, _relations_filename)) prof = ItsdbProfile(path, index=False) prof.write_table('item', item_rows, gzip=gzip) return prof
def _write_table(profile_dir, table_name, rows, fields, append=False, gzip=False): # don't gzip if empty rows = iter(rows) try: first_row = next(rows) except StopIteration: gzip = False else: rows = chain([first_row], rows) if gzip and append: logging.warning('Appending to a gzip file may result in ' 'inefficient compression.') if not os.path.exists(profile_dir): raise ItsdbError( 'Profile directory does not exist: {}'.format(profile_dir)) tbl_filename = os.path.join(profile_dir, table_name) mode = 'a' if append else 'w' if gzip: # text mode only from py3.3; until then use TextIOWrapper #mode += 't' # text mode for gzip f = TextIOWrapper(gzopen(tbl_filename + '.gz', mode=mode)) else: f = open(tbl_filename, mode=mode) for row in rows: f.write(make_row(row, fields) + '\n') f.close()
def add_applicator(self, table, cols, function): """ Add an applicator. When reading `table`, rows in `table` will be modified by :py:func:`apply_rows`. Args: table: The table to apply the function to. cols: The columns in `table` to apply the function on. function: The applicator function. """ if table not in self.relations: raise ItsdbError('Cannot add applicator; table "{}" is not ' 'defined by the relations file.'.format(table)) if cols is None: raise ItsdbError('Cannot add applicator; columns not specified.') fields = set(f.name for f in self.relations[table]) for col in cols: if col not in fields: raise ItsdbError('Cannot add applicator; column "{}" not ' 'defined by the relations file.'.format(col)) self.applicators[table].append((cols, function))
def add_filter(self, table, cols, condition): """ Add a filter. When reading `table`, rows in `table` will be filtered by :py:func:`filter_rows`. Args: table: The table the filter applies to. cols: The columns in `table` to filter on. condition: The filter function. """ if table is not None and table not in self.relations: raise ItsdbError('Cannot add filter; table "{}" is not defined ' 'by the relations file.'.format(table)) # this is a hack, though perhaps well-motivated if cols is None: cols = [None] self.filters[table].append((cols, condition))
def _open_table(tbl_filename): if tbl_filename.endswith('.gz'): gz_filename = tbl_filename tbl_filename = tbl_filename[:-3] else: gz_filename = tbl_filename + '.gz' if os.path.exists(tbl_filename) and os.path.exists(gz_filename): logging.warning( 'Both gzipped and plaintext files were found; attempting to ' 'use the plaintext one.') if os.path.exists(tbl_filename): with open(tbl_filename) as f: yield f elif os.path.exists(gz_filename): # text mode only from py3.3; until then use TextIOWrapper with TextIOWrapper( BufferedReader(gzopen(tbl_filename + '.gz', mode='r'))) as f: yield f else: raise ItsdbError( 'Table does not exist at {}(.gz)'.format(tbl_filename))
def select_rows(cols, rows, mode='list'): """ Yield data selected from rows. It is sometimes useful to select a subset of data from a profile. This function selects the data in `cols` from `rows` and yields it in a form specified by `mode`. Possible values of `mode` are: ============== ================= ============================ mode description example ['i-id', 'i-wf'] ============== ================= ============================ list (default) a list of values [10, 1] dict col to value map {'i-id':'10','i-wf':'1'} row [incr tsdb()] row '10@1' ============== ================= ============================ Args: cols: an iterable of column names to select data for rows: the rows to select column data from mode: the form yielded data should take Yields: Selected data in the form specified by `mode`. """ mode = mode.lower() if mode == 'list': cast = lambda cols, data: data elif mode == 'dict': cast = lambda cols, data: dict(zip(cols, data)) elif mode == 'row': cast = lambda cols, data: encode_row(data) else: raise ItsdbError( 'Invalid mode for select operation: {}\n' ' Valid options include: list, dict, row'.format(mode)) for row in rows: data = [row.get(c) for c in cols] yield cast(cols, data)
def table_relations(self, table): if table not in self.relations: raise ItsdbError( 'Table {} is not defined in the profiles relations.'.format( table)) return self.relations[table]