Ejemplo n.º 1
0
    def get_annotation_sv_ids(self, annotation_id, time_stamp=None):
        """ Reads the sv ids belonging to an annotation

        :param annotation_id: uint64
        :param time_stamp: None or datetime
        :return: list of np.uint64s
        """

        if time_stamp is None:
            time_stamp = datetime.datetime.utcnow()

        # Adjust time_stamp to bigtable precision
        time_stamp -= datetime.timedelta(microseconds=time_stamp.microsecond %
                                         1000)

        time_filter = TimestampRangeFilter(TimestampRange(end=time_stamp))

        row = self.table.read_row(serialize_node_id(annotation_id),
                                  filter_=time_filter)

        if row is None:
            return []

        # for entry in row.cells[self.data_family_id][serialize_key("sv_ids")]:
        #     print(entry.timestamp)

        sv_ids_bin = row.cells[self.data_family_id][serialize_key(
            "sv_ids")][0].value

        if len(sv_ids_bin) == 0:
            return None

        sv_ids = np.frombuffer(sv_ids_bin, dtype=np.uint64)

        return sv_ids
Ejemplo n.º 2
0
    def get_annotation_data(self, annotation_id, time_stamp=None):
        """ Reads the data of a single annotation object

        :param annotation_id: uint64
        :param time_stamp: None or datetime
        :return: blob
        """

        if time_stamp is None:
            time_stamp = datetime.datetime.utcnow()

        # Adjust time_stamp to bigtable precision
        time_stamp -= datetime.timedelta(microseconds=time_stamp.microsecond %
                                         1000)

        time_filter = TimestampRangeFilter(TimestampRange(end=time_stamp))

        row = self.table.read_row(serialize_node_id(annotation_id),
                                  filter_=time_filter)

        bin_data = row.cells[self.data_family_id][serialize_key(
            "data")][0].value

        if len(bin_data) == 0:
            return None

        return bin_data
Ejemplo n.º 3
0
    def __init__(self, table, timestamp=None, batch_size=None,
                 transaction=False, wal=_WAL_SENTINEL):
        if wal is not _WAL_SENTINEL:
            _WARN(_WAL_WARNING)

        if batch_size is not None:
            if transaction:
                raise TypeError('When batch_size is set, a Batch cannot be '
                                'transactional')
            if batch_size <= 0:
                raise ValueError('batch_size must be positive')

        self._table = table
        self._batch_size = batch_size
        self._timestamp = self._delete_range = None

        # Timestamp is in milliseconds, convert to microseconds.
        if timestamp is not None:
            self._timestamp = _datetime_from_microseconds(1000 * timestamp)
            # For deletes, we get the very next timestamp (assuming timestamp
            # granularity is milliseconds). This is because HappyBase users
            # expect HBase deletes to go **up to** and **including** the
            # timestamp while Cloud Bigtable Time Ranges **exclude** the
            # final timestamp.
            next_timestamp = self._timestamp + _ONE_MILLISECOND
            self._delete_range = TimestampRange(end=next_timestamp)

        self._transaction = transaction

        # Internal state for tracking mutations.
        self._row_map = {}
        self._mutation_count = 0
Ejemplo n.º 4
0
    def test_to_pb(self):
        from google.cloud.bigtable.row_filters import TimestampRange

        range_ = TimestampRange()
        row_filter = self._makeOne(range_)
        pb_val = row_filter.to_pb()
        expected_pb = _RowFilterPB(timestamp_range_filter=_TimestampRangePB())
        self.assertEqual(pb_val, expected_pb)
Ejemplo n.º 5
0
 def _get(self, uuid: UUID, time: int) -> Optional[bytes]:
     return self._table.read_row(
         row_key=uuid.bytes_le,
         filter_=RowFilterChain(filters=[
             TimestampRangeFilter(range_=TimestampRange(end=millis_dt(time +
                                                                      1))),
             CellsColumnLimitFilter(num_cells=1)
         ])).cell_value(column_family_id=FAMILY, column=COLUMN)
Ejemplo n.º 6
0
    def test_delete_cells_with_time_range(self):
        import datetime
        from google.cloud._helpers import _EPOCH
        from google.cloud.bigtable.row_filters import TimestampRange

        microseconds = 30871000  # Makes sure already milliseconds granularity
        start = _EPOCH + datetime.timedelta(microseconds=microseconds)
        time_range = TimestampRange(start=start)
        self._delete_cells_helper(time_range=time_range)
Ejemplo n.º 7
0
    def _lock_single_annotation(self, annotation_id, operation_id):
        """ Attempts to lock the latest version of a root node

        :param annotation_id: uint64
        :param operation_id: str
            an id that is unique to the process asking to lock the root node
        :return: bool
            success
        """

        operation_id_b = serialize_key(operation_id)

        lock_key = serialize_key("lock")

        # Build a column filter which tests if a lock was set (== lock column
        # exists) and if it is still valid (timestamp younger than
        # LOCK_EXPIRED_TIME_DELTA)

        time_cutoff = datetime.datetime.utcnow() - LOCK_EXPIRED_TIME_DELTA

        # Comply to resolution of BigTables TimeRange
        time_cutoff -= datetime.timedelta(
            microseconds=time_cutoff.microsecond % 1000)

        time_filter = TimestampRangeFilter(TimestampRange(start=time_cutoff))

        lock_key_filter = ColumnRangeFilter(
            column_family_id=self.data_family_id,
            start_column=lock_key,
            end_column=lock_key,
            inclusive_start=True,
            inclusive_end=True)

        # Combine filters together
        chained_filter = RowFilterChain([time_filter, lock_key_filter])

        # Get conditional row using the chained filter
        annotation_row = self.table.row(serialize_node_id(annotation_id),
                                        filter_=chained_filter)

        # Set row lock if condition returns no results (state == False)
        annotation_row.set_cell(self.data_family_id,
                                lock_key,
                                operation_id_b,
                                state=False)

        # The lock was acquired when set_cell returns False (state)
        lock_acquired = not annotation_row.commit()

        return lock_acquired
Ejemplo n.º 8
0
    def _unlock_annotation(self, annotation_id, operation_id):
        """ Unlocks a root

        This is mainly used for cases where multiple roots need to be locked and
        locking was not sucessful for all of them

        :param annotation_id: uint64
        :param operation_id: str
            an id that is unique to the process asking to lock the root node
        :return: bool
            success
        """
        operation_id_b = serialize_key(operation_id)

        lock_key = serialize_key("lock")

        # Build a column filter which tests if a lock was set (== lock column
        # exists) and if it is still valid (timestamp younger than
        # LOCK_EXPIRED_TIME_DELTA) and if the given operation_id is still
        # the active lock holder

        time_cutoff = datetime.datetime.utcnow() - LOCK_EXPIRED_TIME_DELTA

        # Comply to resolution of BigTables TimeRange
        time_cutoff -= datetime.timedelta(
            microseconds=time_cutoff.microsecond % 1000)

        time_filter = TimestampRangeFilter(TimestampRange(start=time_cutoff))

        column_key_filter = ColumnQualifierRegexFilter(lock_key)

        value_filter = ColumnQualifierRegexFilter(operation_id_b)

        # Chain these filters together
        chained_filter = RowFilterChain(
            [time_filter, column_key_filter, value_filter])

        # Get conditional row using the chained filter
        root_row = self.table.row(serialize_node_id(annotation_id),
                                  filter_=chained_filter)

        # Delete row if conditions are met (state == True)
        root_row.delete_cell(self.data_family_id, lock_key, state=True)

        root_row.commit()
Ejemplo n.º 9
0
def get_time_range_filter(start_time: Optional[datetime.datetime] = None,
                          end_time: Optional[datetime.datetime] = None,
                          end_inclusive: bool = True) -> RowFilter:
    """ Generates a TimeStampRangeFilter which is inclusive for start and (optionally) end.

    :param start:
    :param end:
    :return:
    """
    # Comply to resolution of BigTables TimeRange
    if start_time is not None:
        start_time = get_google_compatible_time_stamp(start_time,
                                                      round_up=False)
    if end_time is not None:
        end_time = get_google_compatible_time_stamp(end_time,
                                                    round_up=end_inclusive)

    return TimestampRangeFilter(TimestampRange(start=start_time, end=end_time))
Ejemplo n.º 10
0
    def get_annotation_ids_from_sv(self, sv_id, time_stamp=None):
        """ Acquires all annotation ids associated with a supervoxel

        To also read the data of the acquired annotations use
        `get_annotations_from_sv`

        :param sv_id: uint64
        :param time_stamp: None or datetime
        :return: list
            annotation ids
        """

        if time_stamp is None:
            time_stamp = datetime.datetime.utcnow()

        # Adjust time_stamp to bigtable precision
        time_stamp -= datetime.timedelta(microseconds=time_stamp.microsecond %
                                         1000)

        time_filter = TimestampRangeFilter(TimestampRange(end=time_stamp))

        # Read mapped entries with time_stamp
        row = self.table.read_row(serialize_node_id(sv_id),
                                  filter_=time_filter)

        if row is None:
            return []

        anno_id_entries = row.cells[self.mapping_family_id][serialize_key(
            "mapped_anno_ids")]
        anno_ids = []
        for entry in anno_id_entries:
            # print(len(np.frombuffer(entry.value, dtype=np.uint64)))
            anno_ids.extend(np.frombuffer(entry.value, dtype=np.uint64))

        # Resolve changes over time
        anno_ids, c_anno_ids = np.unique(anno_ids, return_counts=True)

        # Every anno_id with number of entries % 2 == 0 was removed
        anno_ids = anno_ids[c_anno_ids % 2 == 1]

        return anno_ids
Ejemplo n.º 11
0
def _convert_to_time_range(timestamp=None):
    """Create a timestamp range from an HBase / HappyBase timestamp.

    HBase uses timestamp as an argument to specify an exclusive end
    deadline. Cloud Bigtable also uses exclusive end times, so
    the behavior matches.

    :type timestamp: int
    :param timestamp: (Optional) Timestamp (in milliseconds since the
                      epoch). Intended to be used as the end of an HBase
                      time range, which is exclusive.

    :rtype: :class:`~google.cloud.bigtable.row.TimestampRange`,
            :data:`NoneType <types.NoneType>`
    :returns: The timestamp range corresponding to the passed in
              ``timestamp``.
    """
    if timestamp is None:
        return None

    next_timestamp = _datetime_from_microseconds(1000 * timestamp)
    return TimestampRange(end=next_timestamp)
    def test_constructor_explicit(self):
        from google.cloud._helpers import _datetime_from_microseconds
        from google.cloud.bigtable.row_filters import TimestampRange

        table = object()
        timestamp = 144185290431
        batch_size = 42
        transaction = False  # Must be False when batch_size is non-null

        batch = self._make_one(
            table, timestamp=timestamp,
            batch_size=batch_size, transaction=transaction)
        self.assertEqual(batch._table, table)
        self.assertEqual(batch._batch_size, batch_size)
        self.assertEqual(batch._timestamp,
                         _datetime_from_microseconds(1000 * timestamp))

        next_timestamp = _datetime_from_microseconds(1000 * (timestamp + 1))
        time_range = TimestampRange(end=next_timestamp)
        self.assertEqual(batch._delete_range, time_range)
        self.assertEqual(batch._transaction, transaction)
        self.assertEqual(batch._row_map, {})
        self.assertEqual(batch._mutation_count, 0)
Ejemplo n.º 13
0
import os
import time
from pprint import pprint

from google.cloud import bigtable
from google.cloud.bigtable.row_filters import TimestampRange, TimestampRangeFilter

project_id = os.environ.get("PROJECT_ID", "simula-cov19")
instance_id = os.environ.get("BT_INSTANCE_ID", "test-bigt")
table_id = 'test'

now = datetime.datetime.now(datetime.timezone.utc)
# get events reported via api from the last day (not event timestamp)
start_time = now - datetime.timedelta(days=5)
end_time = now
ts_range = TimestampRange(start=start_time, end=end_time)
row_filter = TimestampRangeFilter(ts_range)

# connect to Bigtable
client = bigtable.Client(project=project_id)
instance = client.instance(instance_id)
table = instance.table(table_id)

# make our query

for row in table.read_rows(filter_=row_filter):
    print(row.row_key)
    for cell in row.to_dict().get(b'events:event'):
        data = json.loads(cell.value.decode('utf8'))
        pprint(data, indent=2)