def build_row_filter(row_key_regex=None, column_families=None, columns=None): """ Build a row filter using a combination of row keys, column families, or columns to retrieve. Args: row_key_regex (:obj:`str`, optional): Regular expression for matching row keys. Defaults to None. column_families (:obj:`iter` of :obj:`str`, optional): An iterable of column families to retrieve. Defaults to None. columns (:obj:`iter` of :obj:`str`, optional): An iterable of column names or regular expressions for matching columns. Defaults to None. Returns: RowFilter: The built row filter from passed in parameters. If no parameters, None is returned. """ if (row_key_regex is not None and not isinstance(row_key_regex, six.string_types)): raise TypeError('row_key_regex must be a str or unicode type.') if (column_families is not None and not isinstance(column_families, collections.Sequence)): raise TypeError('column_families must be an iterable.') if columns is not None and not isinstance(columns, collections.Sequence): raise TypeError('columns must be an iterable.') filters = [] # Build a filter for row keys. if row_key_regex: row_key_filter = RowKeyRegexFilter(row_key_regex) filters.append(row_key_filter) # Build filters for column families. if column_families: cf_filters = [ColumnRangeFilter(cf) for cf in column_families] if len(cf_filters) > 1: filters.append(RowFilterUnion(cf_filters)) else: filters.append(cf_filters[0]) # Build filters for columns. if columns: col_filters = [ColumnQualifierRegexFilter(col) for col in columns] if len(col_filters) > 1: filters.append(RowFilterUnion(col_filters)) else: filters.append(col_filters[0]) if len(filters) == 1: return filters[0] else: return RowFilterChain(filters=filters) if filters else None
def _columns_filter_helper(columns): """Creates a union filter for a list of columns. :type columns: list :param columns: Iterable containing column names (as strings). Each column name can be either * an entire column family: ``fam`` or ``fam:`` * a single column: ``fam:col`` :rtype: :class:`~google.cloud.bigtable.row.RowFilter` :returns: The union filter created containing all of the matched columns. :raises: :class:`ValueError <exceptions.ValueError>` if there are no filters to union. """ filters = [] for column_family_id, column_qualifier in _get_column_pairs(columns): fam_filter = FamilyNameRegexFilter(column_family_id) if column_qualifier is not None: qual_filter = ColumnQualifierRegexFilter(column_qualifier) combined_filter = RowFilterChain(filters=[fam_filter, qual_filter]) filters.append(combined_filter) else: filters.append(fam_filter) num_filters = len(filters) if num_filters == 0: raise ValueError('Must have at least one filter.') elif num_filters == 1: return filters[0] else: return RowFilterUnion(filters=filters)
def get_first_row(self, start_key, column_families=None, end_key=None): filters = [CellsColumnLimitFilter(1)] if column_families is not None: c_filters = [] for c in column_families: c_filters.append(FamilyNameRegexFilter(c)) if len(c_filters) == 1: filters.append(c_filters[0]) elif len(c_filters) > 1: filters.append(RowFilterUnion(c_filters)) if len(filters) > 1: filter_ = RowFilterChain(filters=filters) else: filter_ = filters[0] row_set = RowSet() row_set.add_row_range_from_keys(start_key=start_key, start_inclusive=True, end_key=end_key) generator = self._low_level.read_rows(filter_=filter_, row_set=row_set) i = -1 for rowdata in generator: i += 1 # if rowdata is None: # continue rk = rowdata.row_key.decode("utf-8") if end_key is None and not rk.startswith(start_key): break curr_row_dict = self.partial_row_to_dict(rowdata) return (rk, curr_row_dict)
def row_generator(self, row_keys=None, start_key=None, end_key=None, column_families=None, check_prefix=None): if row_keys is None and start_key is None: raise ValueError("use row_keys or start_key parameter") if start_key is not None and (end_key is None and check_prefix is None): raise ValueError( "use start_key together with end_key or check_prefix") filters = [CellsColumnLimitFilter(1)] if column_families is not None: c_filters = [] for c in column_families: c_filters.append(FamilyNameRegexFilter(c)) if len(c_filters) == 1: filters.append(c_filters[0]) elif len(c_filters) > 1: filters.append(RowFilterUnion(c_filters)) if len(filters) > 1: filter_ = RowFilterChain(filters=filters) else: filter_ = filters[0] row_set = RowSet() if row_keys: for r in row_keys: row_set.add_row_key(r) else: row_set.add_row_range_from_keys(start_key=start_key, end_key=end_key, start_inclusive=True, end_inclusive=True) generator = self._low_level.read_rows(filter_=filter_, row_set=row_set) i = -1 for rowdata in generator: i += 1 if rowdata is None: if row_keys: yield (row_keys[i], {}) continue rk = rowdata.row_key.decode("utf-8") if check_prefix: if not rk.startswith(check_prefix): break curr_row_dict = self.partial_row_to_ordered_dict(rowdata) yield (rk, curr_row_dict)
def test_read_with_label_applied(data_table, rows_to_delete, skip_on_emulator): from google.cloud.bigtable.row_filters import ApplyLabelFilter from google.cloud.bigtable.row_filters import ColumnQualifierRegexFilter from google.cloud.bigtable.row_filters import RowFilterChain from google.cloud.bigtable.row_filters import RowFilterUnion row = data_table.direct_row(ROW_KEY) rows_to_delete.append(row) cell1, _, cell3, _ = _write_to_row(row, None, row, None) row.commit() # Combine a label with column 1. label1 = "label-red" label1_filter = ApplyLabelFilter(label1) col1_filter = ColumnQualifierRegexFilter(COL_NAME1) chain1 = RowFilterChain(filters=[col1_filter, label1_filter]) # Combine a label with column 2. label2 = "label-blue" label2_filter = ApplyLabelFilter(label2) col2_filter = ColumnQualifierRegexFilter(COL_NAME2) chain2 = RowFilterChain(filters=[col2_filter, label2_filter]) # Bring our two labeled columns together. row_filter = RowFilterUnion(filters=[chain1, chain2]) partial_row_data = data_table.read_row(ROW_KEY, filter_=row_filter) assert partial_row_data.row_key == ROW_KEY cells_returned = partial_row_data.cells col_fam1 = cells_returned.pop(COLUMN_FAMILY_ID1) # Make sure COLUMN_FAMILY_ID1 was the only key. assert len(cells_returned) == 0 (cell1_new, ) = col_fam1.pop(COL_NAME1) (cell3_new, ) = col_fam1.pop(COL_NAME2) # Make sure COL_NAME1 and COL_NAME2 were the only keys. assert len(col_fam1) == 0 # Check that cell1 has matching values and gained a label. assert cell1_new.value == cell1.value assert cell1_new.timestamp == cell1.timestamp assert cell1.labels == [] assert cell1_new.labels == [label1] # Check that cell3 has matching values and gained a label. assert cell3_new.value == cell3.value assert cell3_new.timestamp == cell3.timestamp assert cell3.labels == [] assert cell3_new.labels == [label2]
def test_read_with_label_applied(self): self._maybe_emulator_skip("Labels not supported by Bigtable emulator") row = self._table.row(ROW_KEY) self.rows_to_delete.append(row) cell1, _, cell3, _ = self._write_to_row(row, None, row) row.commit() # Combine a label with column 1. label1 = u"label-red" label1_filter = ApplyLabelFilter(label1) col1_filter = ColumnQualifierRegexFilter(COL_NAME1) chain1 = RowFilterChain(filters=[col1_filter, label1_filter]) # Combine a label with column 2. label2 = u"label-blue" label2_filter = ApplyLabelFilter(label2) col2_filter = ColumnQualifierRegexFilter(COL_NAME2) chain2 = RowFilterChain(filters=[col2_filter, label2_filter]) # Bring our two labeled columns together. row_filter = RowFilterUnion(filters=[chain1, chain2]) partial_row_data = self._table.read_row(ROW_KEY, filter_=row_filter) self.assertEqual(partial_row_data.row_key, ROW_KEY) cells_returned = partial_row_data.cells col_fam1 = cells_returned.pop(COLUMN_FAMILY_ID1) # Make sure COLUMN_FAMILY_ID1 was the only key. self.assertEqual(len(cells_returned), 0) cell1_new, = col_fam1.pop(COL_NAME1) cell3_new, = col_fam1.pop(COL_NAME2) # Make sure COL_NAME1 and COL_NAME2 were the only keys. self.assertEqual(len(col_fam1), 0) # Check that cell1 has matching values and gained a label. self.assertEqual(cell1_new.value, cell1.value) self.assertEqual(cell1_new.timestamp, cell1.timestamp) self.assertEqual(cell1.labels, []) self.assertEqual(cell1_new.labels, [label1]) # Check that cell3 has matching values and gained a label. self.assertEqual(cell3_new.value, cell3.value) self.assertEqual(cell3_new.timestamp, cell3.timestamp) self.assertEqual(cell3.labels, []) self.assertEqual(cell3_new.labels, [label2])
def get_column_filter( columns: Union[Iterable[column_keys._Column], column_keys._Column] = None ) -> RowFilter: """ Generates a RowFilter that accepts the specified columns """ if isinstance(columns, column_keys._Column): return ColumnRangeFilter(columns.family_id, start_column=columns.key, end_column=columns.key) elif len(columns) == 1: return ColumnRangeFilter(columns[0].family_id, start_column=columns[0].key, end_column=columns[0].key) return RowFilterUnion([ ColumnRangeFilter(col.family_id, start_column=col.key, end_column=col.key) for col in columns ])
def read_row(self, row_id, column_families=None): filters = [CellsColumnLimitFilter(1)] if column_families is not None: c_filters = [] for c in column_families: c_filters.append(FamilyNameRegexFilter(c)) if len(c_filters) == 1: filters.append(c_filters[0]) elif len(c_filters) > 1: filters.append(RowFilterUnion(c_filters)) if len(filters) > 1: filter_ = RowFilterChain(filters=filters) else: filter_ = filters[0] res = self._low_level.read_row(row_id.encode("utf-8"), filter_=filter_) if res is None: raise KeyError("row {} not found".format(row_id)) return self.partial_row_to_dict(res)
def _row_keys_filter_helper(row_keys): """Creates a union filter for a list of rows. :type row_keys: list :param row_keys: Iterable containing row keys (as strings). :rtype: :class:`~google.cloud.bigtable.row.RowFilter` :returns: The union filter created containing all of the row keys. :raises: :class:`ValueError <exceptions.ValueError>` if there are no filters to union. """ filters = [] for row_key in row_keys: filters.append(RowKeyRegexFilter(row_key)) num_filters = len(filters) if num_filters == 0: raise ValueError('Must have at least one filter.') elif num_filters == 1: return filters[0] else: return RowFilterUnion(filters=filters)