Python RowFilterChain Exemples, gcloud.bigtable.row_filters.RowFilterChain Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : cloud_bigtable_data_store.py Projet : ytisf/grr

    def _GetAttributeFilterUnion(self, attributes, timestamp_filter=None):
        filters = []
        for attribute_prefix in attributes:
            family, column = self.GetFamilyColumn(attribute_prefix)

            family_filter = row_filters.FamilyNameRegexFilter(family)
            row_filter_list = [family_filter]

            if column:
                col_filter = row_filters.ColumnQualifierRegexFilter(column)
                row_filter_list.append(col_filter)

            if timestamp_filter:
                row_filter_list.append(timestamp_filter)

            if len(row_filter_list) > 1:
                row_filter = row_filters.RowFilterChain(
                    filters=row_filter_list)
            else:
                row_filter = row_filter_list[0]

            filters.append(row_filter)

        # More than one attribute, use a union, otherwise just use the
        # existing filter.
        if len(filters) > 1:
            filters = row_filters.RowFilterUnion(filters=filters)
        else:
            filters = filters[0]

        return filters

Exemple #2

0

Afficher le fichier

Fichier : cloud_bigtable_data_store.py Projet : ytisf/grr

    def ScanAttributes(self,
                       subject_prefix,
                       attributes,
                       after_urn=None,
                       max_records=None,
                       token=None,
                       relaxed_order=False):
        subject_prefix = self._CleanSubjectPrefix(subject_prefix)
        after_urn = self._CleanAfterURN(after_urn, subject_prefix)
        # Turn subject prefix into an actual regex
        subject_prefix += ".*"
        self.security_manager.CheckDataStoreAccess(token, [subject_prefix],
                                                   "rq")

        subject_filter = row_filters.RowKeyRegexFilter(
            utils.SmartStr(subject_prefix))
        latest_value = row_filters.CellsColumnLimitFilter(1)
        attribute_filters = self._GetAttributeFilterUnion(attributes)
        # Subject AND (attr1 OR attr2) AND latest_value
        query_filter = row_filters.RowFilterChain(
            [subject_filter, attribute_filters, latest_value])

        # The API results include the start row, we want to exclude it, append a
        # null to do so.
        if after_urn is not None:
            after_urn += "\x00"

        rows_data = self.CallWithRetry(self.table.read_rows,
                                       "read",
                                       start_key=after_urn,
                                       limit=max_records,
                                       filter_=query_filter)

        # Ideally we should be able to stream and yield, but it seems we can't:
        # https://github.com/GoogleCloudPlatform/google-cloud-python/issues/1812
        self.CallWithRetry(rows_data.consume_all, "read")

        results = []
        if rows_data.rows:
            for subject, row_data in rows_data.rows.iteritems():
                subject_results = self._ReOrderRowResults(row_data)
                results.append((subject, subject_results))
        return sorted(results, key=lambda x: x[0])

Exemple #3

0

Afficher le fichier

Fichier : cloud_bigtable_data_store.py Projet : ytisf/grr

    def Resolve(self, subject, attribute, token=None):
        """Retrieve the latest value set for a subject's attribute.

    Args:
      subject: The subject URN.
      attribute: The attribute.
      token: The security token used in this call.

    Returns:
      A (string, timestamp in microseconds) stored in the bigtable
      cell, or (None, 0).

    Raises:
      AccessError: if anything goes wrong.
    """
        subject = utils.SmartStr(subject)
        self.security_manager.CheckDataStoreAccess(
            token, [subject], self.GetRequiredResolveAccess(attribute))

        attribute = utils.SmartStr(attribute)
        family, column = self.GetFamilyColumn(attribute)

        col_filter = row_filters.ColumnRangeFilter(family,
                                                   start_column=column,
                                                   end_column=column)

        # Most recent
        latest_filter = row_filters.CellsColumnLimitFilter(1)

        row_filter = row_filters.RowFilterChain(
            filters=[col_filter, latest_filter])
        row_data = self.table.read_row(subject, filter_=row_filter)

        if row_data:
            for cell in row_data.cells[family][column]:
                return self.Decode(attribute,
                                   cell.value), self.DatetimeToMicroseconds(
                                       cell.timestamp)

        return None, 0

Exemple #4

0

Afficher le fichier

Fichier : cloud_bigtable_data_store.py Projet : ytisf/grr

    def _Acquire(self, lease_time):
        now = int(time.time() * 1e6)
        expires = int((time.time() + lease_time) * 1e6)

        # Only latest value
        latest_value = row_filters.CellsColumnLimitFilter(1)
        # Match any lease time value > now which means someone else holds a lock
        # We can't store these as ints, encode to str.
        current_lease = row_filters.ValueRangeFilter(
            start_value=utils.SmartStr(now), inclusive_start=False)

        # aff4:lease
        family, column = self.store.GetFamilyColumn(self.store.LEASE_ATTRIBUTE)
        col_filter = row_filters.ColumnRangeFilter(family,
                                                   start_column=column,
                                                   end_column=column)

        # Note filter chains are evaluated in order so there are performance
        # considerations with which filter to apply first
        filter_chain = row_filters.RowFilterChain(
            [col_filter, current_lease, latest_value])
        mutate_row = self.store.table.row(self.subject, filter_=filter_chain)

        # state=False means no lease or it's expired, in this case take the lock.
        mutate_row.set_cell(family,
                            column,
                            utils.SmartStr(expires),
                            state=False)

        # Check in review: I think we want to retry the RPC here? Or should we just
        # raise like we can't get the lock?
        existing_lock = self.store.CallWithRetry(mutate_row.commit, "write")

        if existing_lock:
            raise ExistingLock("Row %s locked." % self.subject)

        # We have the lock
        self.expires = expires
        self.locked = True

Exemple #5

0

Afficher le fichier

Fichier : cloud_bigtable_data_store.py Projet : ytisf/grr

    def ResolveMulti(self,
                     subject,
                     attributes,
                     timestamp=None,
                     limit=None,
                     token=None):
        """Resolve multiple attributes for a subject.

    Results will be returned in arbitrary order (i.e. not ordered by attribute
    or timestamp).

    Args:
      subject: The subject to resolve.
      attributes: The attribute string or list of strings to match. Note this is
          an exact match, not a regex.
      timestamp: A range of times for consideration (In
          microseconds). Can be a constant such as ALL_TIMESTAMPS or
          NEWEST_TIMESTAMP or a tuple of ints (start, end).
      limit: The maximum total number of results we return.
      token: The security token used in this call.

    Yields:
       A unordered list of (attribute, value string, timestamp).

    Raises:
      AccessError: if anything goes wrong.
    """
        subject = utils.SmartStr(subject)
        self.security_manager.CheckDataStoreAccess(
            token, [subject], self.GetRequiredResolveAccess(attributes))

        if isinstance(attributes, basestring):
            attributes = [utils.SmartStr(attributes)]
        else:
            attributes = [utils.SmartStr(x) for x in attributes]

        filter_union = []
        for attribute in attributes:
            family, column = self.GetFamilyColumn(attribute)
            col_filter = row_filters.ColumnRangeFilter(family,
                                                       start_column=column,
                                                       end_column=column)
            filter_union.append(col_filter)

        # More than one attribute, use a union, otherwise just use the
        # existing filter.
        if len(filter_union) > 1:
            filter_union = row_filters.RowFilterUnion(filters=filter_union)
        else:
            filter_union = filter_union[0]

        # Essentially timestamp AND (attr1 OR attr2)
        timestamp_filter = self._TimestampToFilter(timestamp)
        if timestamp_filter:
            row_filter = row_filters.RowFilterChain(
                filters=[filter_union, timestamp_filter])
        else:
            row_filter = filter_union

        row_data = self.CallWithRetry(self.table.read_row,
                                      "read",
                                      subject,
                                      filter_=row_filter)

        if row_data:
            max_results = limit or 2**64
            for column, cells in row_data.cells[family].iteritems():
                attribute = ":".join((family, column))
                for cell in cells:
                    if max_results <= 0:
                        raise StopIteration
                    max_results -= 1
                    yield attribute, self.Decode(
                        attribute, cell.value), self.DatetimeToMicroseconds(
                            cell.timestamp)

Exemple #6

0

Afficher le fichier

Fichier : cloud_bigtable_data_store.py Projet : ytisf/grr

    def MultiResolvePrefix(self,
                           subjects,
                           attribute_prefix,
                           timestamp=None,
                           limit=None,
                           token=None):
        """Get results from multiple rows matching multiple attributes.

    We could implement this using read_rows, but it is a table scan. Our current
    data model makes that slow because it is a directory hierarchy that includes
    entries for subdirectories interleaved. So if you want all the results for a
    directory you need to skip those in the scan.

    Instead we make an RPC for each subject all at once using a threadpool. We
    pay more in RPC overhead but we get to do it concurrently.

    Args:
      subjects: A list of subjects.
      attribute_prefix: The attribute prefix.

      timestamp: A range of times for consideration (In
          microseconds). Can be a constant such as ALL_TIMESTAMPS or
          NEWEST_TIMESTAMP or a tuple of ints (start, end).

      limit: The total number of result values to return.
      token: An ACL token.

    Yields:
       A list of tuples:
       (subject, [(attribute, value string, timestamp)])

       that can be simply converted to a dict.

       Values with the same attribute (happens when timestamp is not
       NEWEST_TIMESTAMP, but ALL_TIMESTAMPS or time range) are guaranteed
       to be ordered in the decreasing timestamp order.

    Raises:
      AccessError: if anything goes wrong.
      ValueError: if we get a string instead of a list of subjects.
    """
        self.security_manager.CheckDataStoreAccess(
            token, subjects, self.GetRequiredResolveAccess(attribute_prefix))

        if isinstance(subjects, basestring):
            raise ValueError("Expected list of subjects, got string: %s" %
                             subjects)

        if isinstance(attribute_prefix, basestring):
            attribute_prefix_list = [utils.SmartStr(attribute_prefix)]
        else:
            attribute_prefix_list = [
                utils.SmartStr(x) for x in attribute_prefix
            ]

        timestamp_filter = self._TimestampToFilter(timestamp)
        filter_union = []

        for attribute_prefix in attribute_prefix_list:
            family, column = self.GetFamilyColumn(attribute_prefix)

            family_filter = row_filters.FamilyNameRegexFilter(family)
            row_filter_list = [family_filter]

            if column:
                # Make it an actual regex
                column += ".*"
                col_filter = row_filters.ColumnQualifierRegexFilter(column)
                row_filter_list.append(col_filter)

            if timestamp_filter:
                row_filter_list.append(timestamp_filter)

            if len(row_filter_list) > 1:
                row_filter = row_filters.RowFilterChain(
                    filters=row_filter_list)
            else:
                row_filter = row_filter_list[0]

            filter_union.append(row_filter)

        # More than one set of prefixes, use a union, otherwise just use the
        # existing filter chain.
        if len(filter_union) > 1:
            attribute_filter = row_filters.RowFilterUnion(filters=filter_union)
        else:
            attribute_filter = filter_union[0]

        # Apply those filters to each subject as a separate RPC using a threadpool
        pool_args = []
        original_subject_map = {}
        for subject in subjects:
            # List of *args, **kwargs to pass to the RPC caller
            pool_args.append(
                ((self.table.read_row, "read", utils.SmartStr(subject)), {
                    "filter_": attribute_filter
                }))

            # We're expected to return subjects as their original type, which can be
            # URN, unicode, or string. Keep a mapping in this dict.
            original_subject_map[utils.SmartStr(subject)] = subject

        max_results = limit or 2**64
        for result in self.pool.imap_unordered(self._WrapCallWithRetry,
                                               pool_args):
            if max_results <= 0:
                break
            if result:
                subject_results, max_results = self._GetSubjectResults(
                    result, max_results)
                yield original_subject_map[
                    result.row_key], self._SortResultsByAttrTimestampValue(
                        subject_results)