Ejemplo n.º 1
0
    def get_annotation_sv_ids(self, annotation_id, time_stamp=None):
        """ Reads the sv ids belonging to an annotation

        :param annotation_id: uint64
        :param time_stamp: None or datetime
        :return: list of np.uint64s
        """

        if time_stamp is None:
            time_stamp = datetime.datetime.utcnow()

        # Adjust time_stamp to bigtable precision
        time_stamp -= datetime.timedelta(microseconds=time_stamp.microsecond %
                                         1000)

        time_filter = TimestampRangeFilter(TimestampRange(end=time_stamp))

        row = self.table.read_row(serialize_node_id(annotation_id),
                                  filter_=time_filter)

        if row is None:
            return []

        # for entry in row.cells[self.data_family_id][serialize_key("sv_ids")]:
        #     print(entry.timestamp)

        sv_ids_bin = row.cells[self.data_family_id][serialize_key(
            "sv_ids")][0].value

        if len(sv_ids_bin) == 0:
            return None

        sv_ids = np.frombuffer(sv_ids_bin, dtype=np.uint64)

        return sv_ids
Ejemplo n.º 2
0
    def get_annotation_data(self, annotation_id, time_stamp=None):
        """ Reads the data of a single annotation object

        :param annotation_id: uint64
        :param time_stamp: None or datetime
        :return: blob
        """

        if time_stamp is None:
            time_stamp = datetime.datetime.utcnow()

        # Adjust time_stamp to bigtable precision
        time_stamp -= datetime.timedelta(microseconds=time_stamp.microsecond %
                                         1000)

        time_filter = TimestampRangeFilter(TimestampRange(end=time_stamp))

        row = self.table.read_row(serialize_node_id(annotation_id),
                                  filter_=time_filter)

        bin_data = row.cells[self.data_family_id][serialize_key(
            "data")][0].value

        if len(bin_data) == 0:
            return None

        return bin_data
Ejemplo n.º 3
0
 def _get(self, uuid: UUID, time: int) -> Optional[bytes]:
     return self._table.read_row(
         row_key=uuid.bytes_le,
         filter_=RowFilterChain(filters=[
             TimestampRangeFilter(range_=TimestampRange(end=millis_dt(time +
                                                                      1))),
             CellsColumnLimitFilter(num_cells=1)
         ])).cell_value(column_family_id=FAMILY, column=COLUMN)
Ejemplo n.º 4
0
def _filter_chain_helper(column=None,
                         versions=None,
                         timestamp=None,
                         filters=None):
    """Create filter chain to limit a results set.

    :type column: str
    :param column: (Optional) The column (``fam:col``) to be selected
                   with the filter.

    :type versions: int
    :param versions: (Optional) The maximum number of cells to return.

    :type timestamp: int
    :param timestamp: (Optional) Timestamp (in milliseconds since the
                      epoch). If specified, only cells returned before (or
                      at) the timestamp will be matched.

    :type filters: list
    :param filters: (Optional) List of existing filters to be extended.

    :rtype: :class:`~google.cloud.bigtable.row.RowFilter`
    :returns: The chained filter created, or just a single filter if only
              one was needed.
    :raises: :class:`ValueError <exceptions.ValueError>` if there are no
             filters to chain.
    """
    if filters is None:
        filters = []

    if column is not None:
        if isinstance(column, six.binary_type):
            column = column.decode('utf-8')
        column_family_id, column_qualifier = column.split(':')
        fam_filter = FamilyNameRegexFilter(column_family_id)
        qual_filter = ColumnQualifierRegexFilter(column_qualifier)
        filters.extend([fam_filter, qual_filter])
    if versions is not None:
        filters.append(CellsColumnLimitFilter(versions))
    time_range = _convert_to_time_range(timestamp=timestamp)
    if time_range is not None:
        filters.append(TimestampRangeFilter(time_range))

    num_filters = len(filters)
    if num_filters == 0:
        raise ValueError('Must have at least one filter.')
    elif num_filters == 1:
        return filters[0]
    else:
        return RowFilterChain(filters=filters)
Ejemplo n.º 5
0
    def _lock_single_annotation(self, annotation_id, operation_id):
        """ Attempts to lock the latest version of a root node

        :param annotation_id: uint64
        :param operation_id: str
            an id that is unique to the process asking to lock the root node
        :return: bool
            success
        """

        operation_id_b = serialize_key(operation_id)

        lock_key = serialize_key("lock")

        # Build a column filter which tests if a lock was set (== lock column
        # exists) and if it is still valid (timestamp younger than
        # LOCK_EXPIRED_TIME_DELTA)

        time_cutoff = datetime.datetime.utcnow() - LOCK_EXPIRED_TIME_DELTA

        # Comply to resolution of BigTables TimeRange
        time_cutoff -= datetime.timedelta(
            microseconds=time_cutoff.microsecond % 1000)

        time_filter = TimestampRangeFilter(TimestampRange(start=time_cutoff))

        lock_key_filter = ColumnRangeFilter(
            column_family_id=self.data_family_id,
            start_column=lock_key,
            end_column=lock_key,
            inclusive_start=True,
            inclusive_end=True)

        # Combine filters together
        chained_filter = RowFilterChain([time_filter, lock_key_filter])

        # Get conditional row using the chained filter
        annotation_row = self.table.row(serialize_node_id(annotation_id),
                                        filter_=chained_filter)

        # Set row lock if condition returns no results (state == False)
        annotation_row.set_cell(self.data_family_id,
                                lock_key,
                                operation_id_b,
                                state=False)

        # The lock was acquired when set_cell returns False (state)
        lock_acquired = not annotation_row.commit()

        return lock_acquired
Ejemplo n.º 6
0
    def _unlock_annotation(self, annotation_id, operation_id):
        """ Unlocks a root

        This is mainly used for cases where multiple roots need to be locked and
        locking was not sucessful for all of them

        :param annotation_id: uint64
        :param operation_id: str
            an id that is unique to the process asking to lock the root node
        :return: bool
            success
        """
        operation_id_b = serialize_key(operation_id)

        lock_key = serialize_key("lock")

        # Build a column filter which tests if a lock was set (== lock column
        # exists) and if it is still valid (timestamp younger than
        # LOCK_EXPIRED_TIME_DELTA) and if the given operation_id is still
        # the active lock holder

        time_cutoff = datetime.datetime.utcnow() - LOCK_EXPIRED_TIME_DELTA

        # Comply to resolution of BigTables TimeRange
        time_cutoff -= datetime.timedelta(
            microseconds=time_cutoff.microsecond % 1000)

        time_filter = TimestampRangeFilter(TimestampRange(start=time_cutoff))

        column_key_filter = ColumnQualifierRegexFilter(lock_key)

        value_filter = ColumnQualifierRegexFilter(operation_id_b)

        # Chain these filters together
        chained_filter = RowFilterChain(
            [time_filter, column_key_filter, value_filter])

        # Get conditional row using the chained filter
        root_row = self.table.row(serialize_node_id(annotation_id),
                                  filter_=chained_filter)

        # Delete row if conditions are met (state == True)
        root_row.delete_cell(self.data_family_id, lock_key, state=True)

        root_row.commit()
Ejemplo n.º 7
0
def get_time_range_filter(start_time: Optional[datetime.datetime] = None,
                          end_time: Optional[datetime.datetime] = None,
                          end_inclusive: bool = True) -> RowFilter:
    """ Generates a TimeStampRangeFilter which is inclusive for start and (optionally) end.

    :param start:
    :param end:
    :return:
    """
    # Comply to resolution of BigTables TimeRange
    if start_time is not None:
        start_time = get_google_compatible_time_stamp(start_time,
                                                      round_up=False)
    if end_time is not None:
        end_time = get_google_compatible_time_stamp(end_time,
                                                    round_up=end_inclusive)

    return TimestampRangeFilter(TimestampRange(start=start_time, end=end_time))
Ejemplo n.º 8
0
    def get_annotation_ids_from_sv(self, sv_id, time_stamp=None):
        """ Acquires all annotation ids associated with a supervoxel

        To also read the data of the acquired annotations use
        `get_annotations_from_sv`

        :param sv_id: uint64
        :param time_stamp: None or datetime
        :return: list
            annotation ids
        """

        if time_stamp is None:
            time_stamp = datetime.datetime.utcnow()

        # Adjust time_stamp to bigtable precision
        time_stamp -= datetime.timedelta(microseconds=time_stamp.microsecond %
                                         1000)

        time_filter = TimestampRangeFilter(TimestampRange(end=time_stamp))

        # Read mapped entries with time_stamp
        row = self.table.read_row(serialize_node_id(sv_id),
                                  filter_=time_filter)

        if row is None:
            return []

        anno_id_entries = row.cells[self.mapping_family_id][serialize_key(
            "mapped_anno_ids")]
        anno_ids = []
        for entry in anno_id_entries:
            # print(len(np.frombuffer(entry.value, dtype=np.uint64)))
            anno_ids.extend(np.frombuffer(entry.value, dtype=np.uint64))

        # Resolve changes over time
        anno_ids, c_anno_ids = np.unique(anno_ids, return_counts=True)

        # Every anno_id with number of entries % 2 == 0 was removed
        anno_ids = anno_ids[c_anno_ids % 2 == 1]

        return anno_ids
Ejemplo n.º 9
0
import os
import time
from pprint import pprint

from google.cloud import bigtable
from google.cloud.bigtable.row_filters import TimestampRange, TimestampRangeFilter

project_id = os.environ.get("PROJECT_ID", "simula-cov19")
instance_id = os.environ.get("BT_INSTANCE_ID", "test-bigt")
table_id = 'test'

now = datetime.datetime.now(datetime.timezone.utc)
# get events reported via api from the last day (not event timestamp)
start_time = now - datetime.timedelta(days=5)
end_time = now
ts_range = TimestampRange(start=start_time, end=end_time)
row_filter = TimestampRangeFilter(ts_range)

# connect to Bigtable
client = bigtable.Client(project=project_id)
instance = client.instance(instance_id)
table = instance.table(table_id)

# make our query

for row in table.read_rows(filter_=row_filter):
    print(row.row_key)
    for cell in row.to_dict().get(b'events:event'):
        data = json.loads(cell.value.decode('utf8'))
        pprint(data, indent=2)