def get_filtered_search_results(self, search_type, search_query, search_filter, **kwargs): ''' Filter search. Provides results for searches that are faceted. search_type = one of ['locations', 'servers', 'clients'] search_query = input query from api search_filter = { type: ['locations', 'servers', 'clients'], value:[id1, id2] } ''' if not search_filter['type'] or search_filter['type'] == search_type: return [] table_name = self.get_table_name(search_type, search_filter) table_config = get_table_config(self.table_configs, None, table_name) all_results = [] for filter_value in sorted(search_filter['value'], reverse=False): # we always want this filter value to be the first key key_prefix = du.get_key_field(filter_value, 0, table_config) key_prefix += du.BIGTABLE_KEY_DELIM # filter only the `meta` column family - for speed. tablefilter = FamilyNameRegexFilter('meta') all_results += bt.scan_table(table_config, self.get_pool(), prefix=key_prefix, filter=tablefilter, **kwargs) filtered_results = self.filter_results(search_type, search_query, all_results) return self.prepare_filtered_search_results(filtered_results)
def _columns_filter_helper(columns): """Creates a union filter for a list of columns. :type columns: list :param columns: Iterable containing column names (as strings). Each column name can be either * an entire column family: ``fam`` or ``fam:`` * a single column: ``fam:col`` :rtype: :class:`~google.cloud.bigtable.row.RowFilter` :returns: The union filter created containing all of the matched columns. :raises: :class:`ValueError <exceptions.ValueError>` if there are no filters to union. """ filters = [] for column_family_id, column_qualifier in _get_column_pairs(columns): fam_filter = FamilyNameRegexFilter(column_family_id) if column_qualifier is not None: qual_filter = ColumnQualifierRegexFilter(column_qualifier) combined_filter = RowFilterChain(filters=[fam_filter, qual_filter]) filters.append(combined_filter) else: filters.append(fam_filter) num_filters = len(filters) if num_filters == 0: raise ValueError('Must have at least one filter.') elif num_filters == 1: return filters[0] else: return RowFilterUnion(filters=filters)
def get_first_row(self, start_key, column_families=None, end_key=None): filters = [CellsColumnLimitFilter(1)] if column_families is not None: c_filters = [] for c in column_families: c_filters.append(FamilyNameRegexFilter(c)) if len(c_filters) == 1: filters.append(c_filters[0]) elif len(c_filters) > 1: filters.append(RowFilterUnion(c_filters)) if len(filters) > 1: filter_ = RowFilterChain(filters=filters) else: filter_ = filters[0] row_set = RowSet() row_set.add_row_range_from_keys(start_key=start_key, start_inclusive=True, end_key=end_key) generator = self._low_level.read_rows(filter_=filter_, row_set=row_set) i = -1 for rowdata in generator: i += 1 # if rowdata is None: # continue rk = rowdata.row_key.decode("utf-8") if end_key is None and not rk.startswith(start_key): break curr_row_dict = self.partial_row_to_dict(rowdata) return (rk, curr_row_dict)
def test_single_column(self): from google.cloud.bigtable.row_filters import FamilyNameRegexFilter col_fam = "cf1" columns = [col_fam] result = self._call_fut(columns) expected_result = FamilyNameRegexFilter(col_fam) self.assertEqual(result, expected_result)
def row_generator(self, row_keys=None, start_key=None, end_key=None, column_families=None, check_prefix=None): if row_keys is None and start_key is None: raise ValueError("use row_keys or start_key parameter") if start_key is not None and (end_key is None and check_prefix is None): raise ValueError( "use start_key together with end_key or check_prefix") filters = [CellsColumnLimitFilter(1)] if column_families is not None: c_filters = [] for c in column_families: c_filters.append(FamilyNameRegexFilter(c)) if len(c_filters) == 1: filters.append(c_filters[0]) elif len(c_filters) > 1: filters.append(RowFilterUnion(c_filters)) if len(filters) > 1: filter_ = RowFilterChain(filters=filters) else: filter_ = filters[0] row_set = RowSet() if row_keys: for r in row_keys: row_set.add_row_key(r) else: row_set.add_row_range_from_keys(start_key=start_key, end_key=end_key, start_inclusive=True, end_inclusive=True) generator = self._low_level.read_rows(filter_=filter_, row_set=row_set) i = -1 for rowdata in generator: i += 1 if rowdata is None: if row_keys: yield (row_keys[i], {}) continue rk = rowdata.row_key.decode("utf-8") if check_prefix: if not rk.startswith(check_prefix): break curr_row_dict = self.partial_row_to_ordered_dict(rowdata) yield (rk, curr_row_dict)
def _filter_chain_helper(column=None, versions=None, timestamp=None, filters=None): """Create filter chain to limit a results set. :type column: str :param column: (Optional) The column (``fam:col``) to be selected with the filter. :type versions: int :param versions: (Optional) The maximum number of cells to return. :type timestamp: int :param timestamp: (Optional) Timestamp (in milliseconds since the epoch). If specified, only cells returned before (or at) the timestamp will be matched. :type filters: list :param filters: (Optional) List of existing filters to be extended. :rtype: :class:`~google.cloud.bigtable.row.RowFilter` :returns: The chained filter created, or just a single filter if only one was needed. :raises: :class:`ValueError <exceptions.ValueError>` if there are no filters to chain. """ if filters is None: filters = [] if column is not None: if isinstance(column, six.binary_type): column = column.decode('utf-8') column_family_id, column_qualifier = column.split(':') fam_filter = FamilyNameRegexFilter(column_family_id) qual_filter = ColumnQualifierRegexFilter(column_qualifier) filters.extend([fam_filter, qual_filter]) if versions is not None: filters.append(CellsColumnLimitFilter(versions)) time_range = _convert_to_time_range(timestamp=timestamp) if time_range is not None: filters.append(TimestampRangeFilter(time_range)) num_filters = len(filters) if num_filters == 0: raise ValueError('Must have at least one filter.') elif num_filters == 1: return filters[0] else: return RowFilterChain(filters=filters)
def read_row(self, row_id, column_families=None): filters = [CellsColumnLimitFilter(1)] if column_families is not None: c_filters = [] for c in column_families: c_filters.append(FamilyNameRegexFilter(c)) if len(c_filters) == 1: filters.append(c_filters[0]) elif len(c_filters) > 1: filters.append(RowFilterUnion(c_filters)) if len(filters) > 1: filter_ = RowFilterChain(filters=filters) else: filter_ = filters[0] res = self._low_level.read_row(row_id.encode("utf-8"), filter_=filter_) if res is None: raise KeyError("row {} not found".format(row_id)) return self.partial_row_to_dict(res)
def get_bt_results(key_fields, table_config, pool): ''' Scans for and returns big table results based on key fields and a table config key_fields = key fields for the search table_config = the table to scan pool = a big table connection pool ''' prefix_key = du.BIGTABLE_KEY_DELIM.join(key_fields) results = bt.scan_table(table_config, pool, prefix=prefix_key, limit=1, filter=FamilyNameRegexFilter('meta')) result = {} if results: result = results[0] return result
def get_list_table_results(key_fields, pool, include_data, table_config, metric_name): ''' Helper to query table and create results for list based results key_fields = array of key fields. pool = connection pool. include_data = boolean indicating if data attributes should be included in results. table_config = configuration file for table to query from. ''' key_fields = du.BIGTABLE_KEY_DELIM.join(key_fields) params = {"prefix": key_fields} if not include_data: params["filter"] = FamilyNameRegexFilter('meta') results = [] results = scan_table(table_config, pool, **params) sorted_results = [] sorted_results = sorted(results, key=sort_by_count, reverse=True) return sorted_results