Exemple #1
0
def make_skeleton(path, relations, item_rows, gzip=False):
    """
    Instantiate a new profile skeleton (only the relations file and
    item file) from an existing relations file and a list of rows
    for the item table. For standard relations files, it is suggested
    to have, as a minimum, the `i-id` and `i-input` fields in the
    item rows.

    Args:
        path: the destination directory of the skeleton---must not
              already exist, as it will be created
        relations: the path to the relations file
        item_rows: the rows to use for the item file
        gzip: if True, the item file will be compressed
    Returns:
        An ItsdbProfile containing the skeleton data (but the profile
        data will already have been written to disk).
    Raises:
        ItsdbError if the destination directory could not be created.
    """
    try:
        os.makedirs(path)
    except OSError:
        raise ItsdbError('Path already exists: {}.'.format(path))
    import shutil
    shutil.copyfile(relations, os.path.join(path, _relations_filename))
    prof = ItsdbProfile(path, index=False)
    prof.write_table('item', item_rows, gzip=gzip)
    return prof
Exemple #2
0
def _write_table(profile_dir,
                 table_name,
                 rows,
                 fields,
                 append=False,
                 gzip=False):
    # don't gzip if empty
    rows = iter(rows)
    try:
        first_row = next(rows)
    except StopIteration:
        gzip = False
    else:
        rows = chain([first_row], rows)
    if gzip and append:
        logging.warning('Appending to a gzip file may result in '
                        'inefficient compression.')

    if not os.path.exists(profile_dir):
        raise ItsdbError(
            'Profile directory does not exist: {}'.format(profile_dir))

    tbl_filename = os.path.join(profile_dir, table_name)
    mode = 'a' if append else 'w'
    if gzip:
        # text mode only from py3.3; until then use TextIOWrapper
        #mode += 't'  # text mode for gzip
        f = TextIOWrapper(gzopen(tbl_filename + '.gz', mode=mode))
    else:
        f = open(tbl_filename, mode=mode)

    for row in rows:
        f.write(make_row(row, fields) + '\n')

    f.close()
Exemple #3
0
    def add_applicator(self, table, cols, function):
        """
        Add an applicator. When reading `table`, rows in `table` will be
        modified by :py:func:`apply_rows`.

        Args:
            table: The table to apply the function to.
            cols: The columns in `table` to apply the function on.
            function: The applicator function.
        """

        if table not in self.relations:
            raise ItsdbError('Cannot add applicator; table "{}" is not '
                             'defined by the relations file.'.format(table))
        if cols is None:
            raise ItsdbError('Cannot add applicator; columns not specified.')
        fields = set(f.name for f in self.relations[table])
        for col in cols:
            if col not in fields:
                raise ItsdbError('Cannot add applicator; column "{}" not '
                                 'defined by the relations file.'.format(col))
        self.applicators[table].append((cols, function))
Exemple #4
0
    def add_filter(self, table, cols, condition):
        """
        Add a filter. When reading `table`, rows in `table` will be
        filtered by :py:func:`filter_rows`.

        Args:
            table: The table the filter applies to.
            cols: The columns in `table` to filter on.
            condition: The filter function.
        """
        if table is not None and table not in self.relations:
            raise ItsdbError('Cannot add filter; table "{}" is not defined '
                             'by the relations file.'.format(table))
        # this is a hack, though perhaps well-motivated
        if cols is None:
            cols = [None]
        self.filters[table].append((cols, condition))
Exemple #5
0
def _open_table(tbl_filename):
    if tbl_filename.endswith('.gz'):
        gz_filename = tbl_filename
        tbl_filename = tbl_filename[:-3]
    else:
        gz_filename = tbl_filename + '.gz'

    if os.path.exists(tbl_filename) and os.path.exists(gz_filename):
        logging.warning(
            'Both gzipped and plaintext files were found; attempting to '
            'use the plaintext one.')
    if os.path.exists(tbl_filename):
        with open(tbl_filename) as f:
            yield f
    elif os.path.exists(gz_filename):
        # text mode only from py3.3; until then use TextIOWrapper
        with TextIOWrapper(
                BufferedReader(gzopen(tbl_filename + '.gz', mode='r'))) as f:
            yield f
    else:
        raise ItsdbError(
            'Table does not exist at {}(.gz)'.format(tbl_filename))
Exemple #6
0
def select_rows(cols, rows, mode='list'):
    """
    Yield data selected from rows.

    It is sometimes useful to select a subset of data from a profile.
    This function selects the data in `cols` from `rows` and yields it
    in a form specified by `mode`. Possible values of `mode` are:

    ============== =================  ============================
         mode         description       example ['i-id', 'i-wf']
    ============== =================  ============================
    list (default) a list of values   [10, 1]
    dict           col to value map   {'i-id':'10','i-wf':'1'}
    row            [incr tsdb()] row  '10@1'
    ============== =================  ============================

    Args:
        cols: an iterable of column names to select data for
        rows: the rows to select column data from
        mode: the form yielded data should take

    Yields:
        Selected data in the form specified by `mode`.
    """
    mode = mode.lower()
    if mode == 'list':
        cast = lambda cols, data: data
    elif mode == 'dict':
        cast = lambda cols, data: dict(zip(cols, data))
    elif mode == 'row':
        cast = lambda cols, data: encode_row(data)
    else:
        raise ItsdbError(
            'Invalid mode for select operation: {}\n'
            '  Valid options include: list, dict, row'.format(mode))
    for row in rows:
        data = [row.get(c) for c in cols]
        yield cast(cols, data)
Exemple #7
0
 def table_relations(self, table):
     if table not in self.relations:
         raise ItsdbError(
             'Table {} is not defined in the profiles relations.'.format(
                 table))
     return self.relations[table]