Ejemplo n.º 1
0
  def _GetAttributeFilterUnion(self, attributes, timestamp_filter=None):
    filters = []
    for attribute_prefix in attributes:
      family, column = self.GetFamilyColumn(attribute_prefix)

      family_filter = row_filters.FamilyNameRegexFilter(family)
      row_filter_list = [family_filter]

      if column:
        col_filter = row_filters.ColumnQualifierRegexFilter(column)
        row_filter_list.append(col_filter)

      if timestamp_filter:
        row_filter_list.append(timestamp_filter)

      if len(row_filter_list) > 1:
        row_filter = row_filters.RowFilterChain(filters=row_filter_list)
      else:
        row_filter = row_filter_list[0]

      filters.append(row_filter)

    # More than one attribute, use a union, otherwise just use the
    # existing filter.
    if len(filters) > 1:
      filters = row_filters.RowFilterUnion(filters=filters)
    else:
      filters = filters[0]

    return filters
Ejemplo n.º 2
0
def filter_limit_col_qualifier_regex(project_id, instance_id, table_id):
    client = bigtable.Client(project=project_id, admin=True)
    instance = client.instance(instance_id)
    table = instance.table(table_id)

    rows = table.read_rows(filter_=row_filters.ColumnQualifierRegexFilter(
        "connected_.*$".encode("utf-8")))
    for row in rows:
        print_row(row)
Ejemplo n.º 3
0
def read_row_partial(project_id, instance_id, table_id):
    client = bigtable.Client(project=project_id, admin=True)
    instance = client.instance(instance_id)
    table = instance.table(table_id)

    row_key = "phone#4c410523#20190501"
    col_filter = row_filters.ColumnQualifierRegexFilter(b"os_build")

    row = table.read_row(row_key, filter_=col_filter)
    print_row(row)
Ejemplo n.º 4
0
def filter_composing_interleave(project_id, instance_id, table_id):
    client = bigtable.Client(project=project_id, admin=True)
    instance = client.instance(instance_id)
    table = instance.table(table_id)

    rows = table.read_rows(filter_=row_filters.RowFilterUnion(
        filters=[row_filters.ValueRegexFilter("true"),
                 row_filters.ColumnQualifierRegexFilter("os_build")]))
    for row in rows:
        print_row(row)
Ejemplo n.º 5
0
    def get(self):
        bt_array = []
        try:

            table = instance.table(bt_table_name)
            row_set = RowSet()

            for row_key in row_keys:
                row_set.add_row_key(row_key)

            colFilters = []
            for name, bt_name in bt_mapping_dict.items():
                colFilters.append(
                    row_filters.ColumnQualifierRegexFilter(bt_name))

            print("before read_rows...")
            rows = table.read_rows(
                row_set=row_set,
                filter_=row_filters.RowFilterChain(filters=[
                    row_filters.CellsColumnLimitFilter(1),
                    row_filters.RowFilterUnion(filters=colFilters)
                ]),
                retry=bigtable.table.DEFAULT_RETRY_READ_ROWS.with_deadline(
                    60.0))
            print("after read_rows...")

            for row in rows:
                print("Reading data for {}:".format(
                    row.row_key.decode('utf-8')))
                for cf, cols in sorted(row.cells.items()):
                    bt_dict = {}
                    bt_dict['id'] = row.row_key.decode('utf-8')
                    key = None
                    # using BT mapping to return  data
                    for col, cells in sorted(cols.items()):
                        for cell in cells:
                            for name, bt_name in bt_mapping_dict.items():
                                if col.decode('utf-8') == bt_name:
                                    key = name
                                    break
                            if key is not None:
                                bt_dict[key] = cell.value.decode('utf-8')
                    bt_array.append(bt_dict)
        except BaseException as error:
            logging.error(
                'An exception occurred - DemoBigTableGet::get(): {}'.format(
                    error))

        print(bt_array)

        return json.dumps(bt_array), 200, {'ContentType': 'application/json'}
Ejemplo n.º 6
0
def filter_composing_condition(project_id, instance_id, table_id):
    client = bigtable.Client(project=project_id, admin=True)
    instance = client.instance(instance_id)
    table = instance.table(table_id)

    rows = table.read_rows(filter_=row_filters.ConditionalRowFilter(
        base_filter=row_filters.RowFilterChain(filters=[
            row_filters.ColumnQualifierRegexFilter("data_plan_10gb"),
            row_filters.ValueRegexFilter("true")
        ]),
        true_filter=row_filters.ApplyLabelFilter(label="passed-filter"),
        false_filter=row_filters.ApplyLabelFilter(label="filtered-out")))
    for row in rows:
        print_row(row)
Ejemplo n.º 7
0
def write_conditional(project_id, instance_id, table_id):
    client = bigtable.Client(project=project_id, admin=True)
    instance = client.instance(instance_id)
    table = instance.table(table_id)

    timestamp = datetime.datetime.utcnow()
    column_family_id = "stats_summary"

    row_key = "phone#4c410523#20190501"

    row_filter = row_filters.RowFilterChain(filters=[
        row_filters.FamilyNameRegexFilter(column_family_id),
        row_filters.ColumnQualifierRegexFilter("os_build"),
        row_filters.ValueRegexFilter("PQ2A\\..*"),
    ])
    row = table.conditional_row(row_key, filter_=row_filter)
    row.set_cell(column_family_id, "os_name", "android", timestamp)
    row.commit()

    print("Successfully updated row's os_name.")
Ejemplo n.º 8
0
  def MultiResolvePrefix(self,
                         subjects,
                         attribute_prefix,
                         timestamp=None,
                         limit=None,
                         token=None):
    """Get results from multiple rows matching multiple attributes.

    We could implement this using read_rows, but it is a table scan. Our current
    data model makes that slow because it is a directory hierarchy that includes
    entries for subdirectories interleaved. So if you want all the results for a
    directory you need to skip those in the scan.

    Instead we make an RPC for each subject all at once using a threadpool. We
    pay more in RPC overhead but we get to do it concurrently.

    Args:
      subjects: A list of subjects.
      attribute_prefix: The attribute prefix.

      timestamp: A range of times for consideration (In
          microseconds). Can be a constant such as ALL_TIMESTAMPS or
          NEWEST_TIMESTAMP or a tuple of ints (start, end).

      limit: The total number of result values to return.
      token: An ACL token.

    Yields:
       A list of tuples:
       (subject, [(attribute, value string, timestamp)])

       that can be simply converted to a dict.

       Values with the same attribute (happens when timestamp is not
       NEWEST_TIMESTAMP, but ALL_TIMESTAMPS or time range) are guaranteed
       to be ordered in the decreasing timestamp order.

    Raises:
      AccessError: if anything goes wrong.
      ValueError: if we get a string instead of a list of subjects.
    """
    self.security_manager.CheckDataStoreAccess(
        token, subjects, self.GetRequiredResolveAccess(attribute_prefix))

    if isinstance(subjects, basestring):
      raise ValueError("Expected list of subjects, got string: %s" % subjects)

    if isinstance(attribute_prefix, basestring):
      attribute_prefix_list = [utils.SmartStr(attribute_prefix)]
    else:
      attribute_prefix_list = [utils.SmartStr(x) for x in attribute_prefix]

    timestamp_filter = self._TimestampToFilter(timestamp)
    filter_union = []

    for attribute_prefix in attribute_prefix_list:
      family, column = self.GetFamilyColumn(attribute_prefix)

      family_filter = row_filters.FamilyNameRegexFilter(family)
      row_filter_list = [family_filter]

      if column:
        # Make it an actual regex
        column += ".*"
        col_filter = row_filters.ColumnQualifierRegexFilter(column)
        row_filter_list.append(col_filter)

      if timestamp_filter:
        row_filter_list.append(timestamp_filter)

      if len(row_filter_list) > 1:
        row_filter = row_filters.RowFilterChain(filters=row_filter_list)
      else:
        row_filter = row_filter_list[0]

      filter_union.append(row_filter)

    # More than one set of prefixes, use a union, otherwise just use the
    # existing filter chain.
    if len(filter_union) > 1:
      attribute_filter = row_filters.RowFilterUnion(filters=filter_union)
    else:
      attribute_filter = filter_union[0]

    # Apply those filters to each subject as a separate RPC using a threadpool
    pool_args = []
    original_subject_map = {}
    for subject in subjects:
      # List of *args, **kwargs to pass to the RPC caller
      pool_args.append(((self.table.read_row, "read", utils.SmartStr(subject)),
                        {
                            "filter_": attribute_filter
                        }))

      # We're expected to return subjects as their original type, which can be
      # URN, unicode, or string. Keep a mapping in this dict.
      original_subject_map[utils.SmartStr(subject)] = subject

    max_results = limit or 2**64
    for result in self.pool.imap_unordered(self._WrapCallWithRetry, pool_args):
      if max_results <= 0:
        break
      if result:
        subject_results, max_results = self._GetSubjectResults(result,
                                                               max_results)
        yield original_subject_map[
            result.row_key], self._SortResultsByAttrTimestampValue(
                subject_results)