コード例 #1
0
  def _GetAttributeFilterUnion(self, attributes, timestamp_filter=None):
    filters = []
    for attribute_prefix in attributes:
      family, column = self.GetFamilyColumn(attribute_prefix)

      family_filter = row_filters.FamilyNameRegexFilter(family)
      row_filter_list = [family_filter]

      if column:
        col_filter = row_filters.ColumnQualifierRegexFilter(column)
        row_filter_list.append(col_filter)

      if timestamp_filter:
        row_filter_list.append(timestamp_filter)

      if len(row_filter_list) > 1:
        row_filter = row_filters.RowFilterChain(filters=row_filter_list)
      else:
        row_filter = row_filter_list[0]

      filters.append(row_filter)

    # More than one attribute, use a union, otherwise just use the
    # existing filter.
    if len(filters) > 1:
      filters = row_filters.RowFilterUnion(filters=filters)
    else:
      filters = filters[0]

    return filters
コード例 #2
0
def filter_limit_col_family_regex(project_id, instance_id, table_id):
    client = bigtable.Client(project=project_id, admin=True)
    instance = client.instance(instance_id)
    table = instance.table(table_id)

    rows = table.read_rows(
        filter_=row_filters.FamilyNameRegexFilter("stats_.*$".encode("utf-8")))
    for row in rows:
        print_row(row)
コード例 #3
0
ファイル: filter_snippets.py プロジェクト: othercamb/GCP
def filter_composing_chain(project_id, instance_id, table_id):
    client = bigtable.Client(project=project_id, admin=True)
    instance = client.instance(instance_id)
    table = instance.table(table_id)

    rows = table.read_rows(filter_=row_filters.RowFilterChain(
        filters=[row_filters.CellsColumnLimitFilter(1),
                 row_filters.FamilyNameRegexFilter("cell_plan")]))
    for row in rows:
        print_row(row)
コード例 #4
0
def read_transaction(column_family_id, user_id, timestamp):
    global _table
    if _table is None:
        init()

    try:
        r = _table.read_row(
            _get_row_key(user_id, timestamp).encode(),
            filter_=row_filters.FamilyNameRegexFilter(column_family_id))

        if r is None:
            return None

        cells = r.cells[column_family_id][_COLUMN_ID.encode()]
        return [(timestamp, cell.value.decode()) for cell in cells]
    except Exception as e:
        print(e)
コード例 #5
0
def write_conditional(project_id, instance_id, table_id):
    client = bigtable.Client(project=project_id, admin=True)
    instance = client.instance(instance_id)
    table = instance.table(table_id)

    timestamp = datetime.datetime.utcnow()
    column_family_id = "stats_summary"

    row_key = "phone#4c410523#20190501"

    row_filter = row_filters.RowFilterChain(filters=[
        row_filters.FamilyNameRegexFilter(column_family_id),
        row_filters.ColumnQualifierRegexFilter("os_build"),
        row_filters.ValueRegexFilter("PQ2A\\..*"),
    ])
    row = table.conditional_row(row_key, filter_=row_filter)
    row.set_cell(column_family_id, "os_name", "android", timestamp)
    row.commit()

    print("Successfully updated row's os_name.")
コード例 #6
0
def read_transactions(column_family_id, user_id, from_timestamp, to_timestamp):
    global _table
    if _table is None:
        init()

    rs = _table.read_rows(
        start_key=_get_row_key(user_id, from_timestamp).encode(),
        end_key=_get_row_key(user_id, to_timestamp).encode(),
        filter_=row_filters.FamilyNameRegexFilter(column_family_id))

    if rs is None:
        return None

    rs.consume_all()
    res = []
    for row_key, row_data in rs.rows.items():
        cells = row_data.cells[column_family_id][_COLUMN_ID.encode()]
        user_id, timestamp = _prase_row_key(row_key)
        res += [(timestamp, cell.value.decode()) for cell in cells]
    return res
コード例 #7
0
  def MultiResolvePrefix(self,
                         subjects,
                         attribute_prefix,
                         timestamp=None,
                         limit=None,
                         token=None):
    """Get results from multiple rows matching multiple attributes.

    We could implement this using read_rows, but it is a table scan. Our current
    data model makes that slow because it is a directory hierarchy that includes
    entries for subdirectories interleaved. So if you want all the results for a
    directory you need to skip those in the scan.

    Instead we make an RPC for each subject all at once using a threadpool. We
    pay more in RPC overhead but we get to do it concurrently.

    Args:
      subjects: A list of subjects.
      attribute_prefix: The attribute prefix.

      timestamp: A range of times for consideration (In
          microseconds). Can be a constant such as ALL_TIMESTAMPS or
          NEWEST_TIMESTAMP or a tuple of ints (start, end).

      limit: The total number of result values to return.
      token: An ACL token.

    Yields:
       A list of tuples:
       (subject, [(attribute, value string, timestamp)])

       that can be simply converted to a dict.

       Values with the same attribute (happens when timestamp is not
       NEWEST_TIMESTAMP, but ALL_TIMESTAMPS or time range) are guaranteed
       to be ordered in the decreasing timestamp order.

    Raises:
      AccessError: if anything goes wrong.
      ValueError: if we get a string instead of a list of subjects.
    """
    self.security_manager.CheckDataStoreAccess(
        token, subjects, self.GetRequiredResolveAccess(attribute_prefix))

    if isinstance(subjects, basestring):
      raise ValueError("Expected list of subjects, got string: %s" % subjects)

    if isinstance(attribute_prefix, basestring):
      attribute_prefix_list = [utils.SmartStr(attribute_prefix)]
    else:
      attribute_prefix_list = [utils.SmartStr(x) for x in attribute_prefix]

    timestamp_filter = self._TimestampToFilter(timestamp)
    filter_union = []

    for attribute_prefix in attribute_prefix_list:
      family, column = self.GetFamilyColumn(attribute_prefix)

      family_filter = row_filters.FamilyNameRegexFilter(family)
      row_filter_list = [family_filter]

      if column:
        # Make it an actual regex
        column += ".*"
        col_filter = row_filters.ColumnQualifierRegexFilter(column)
        row_filter_list.append(col_filter)

      if timestamp_filter:
        row_filter_list.append(timestamp_filter)

      if len(row_filter_list) > 1:
        row_filter = row_filters.RowFilterChain(filters=row_filter_list)
      else:
        row_filter = row_filter_list[0]

      filter_union.append(row_filter)

    # More than one set of prefixes, use a union, otherwise just use the
    # existing filter chain.
    if len(filter_union) > 1:
      attribute_filter = row_filters.RowFilterUnion(filters=filter_union)
    else:
      attribute_filter = filter_union[0]

    # Apply those filters to each subject as a separate RPC using a threadpool
    pool_args = []
    original_subject_map = {}
    for subject in subjects:
      # List of *args, **kwargs to pass to the RPC caller
      pool_args.append(((self.table.read_row, "read", utils.SmartStr(subject)),
                        {
                            "filter_": attribute_filter
                        }))

      # We're expected to return subjects as their original type, which can be
      # URN, unicode, or string. Keep a mapping in this dict.
      original_subject_map[utils.SmartStr(subject)] = subject

    max_results = limit or 2**64
    for result in self.pool.imap_unordered(self._WrapCallWithRetry, pool_args):
      if max_results <= 0:
        break
      if result:
        subject_results, max_results = self._GetSubjectResults(result,
                                                               max_results)
        yield original_subject_map[
            result.row_key], self._SortResultsByAttrTimestampValue(
                subject_results)