def test_read_with_label_applied(data_table, rows_to_delete, skip_on_emulator): from google.cloud.bigtable.row_filters import ApplyLabelFilter from google.cloud.bigtable.row_filters import ColumnQualifierRegexFilter from google.cloud.bigtable.row_filters import RowFilterChain from google.cloud.bigtable.row_filters import RowFilterUnion row = data_table.direct_row(ROW_KEY) rows_to_delete.append(row) cell1, _, cell3, _ = _write_to_row(row, None, row, None) row.commit() # Combine a label with column 1. label1 = "label-red" label1_filter = ApplyLabelFilter(label1) col1_filter = ColumnQualifierRegexFilter(COL_NAME1) chain1 = RowFilterChain(filters=[col1_filter, label1_filter]) # Combine a label with column 2. label2 = "label-blue" label2_filter = ApplyLabelFilter(label2) col2_filter = ColumnQualifierRegexFilter(COL_NAME2) chain2 = RowFilterChain(filters=[col2_filter, label2_filter]) # Bring our two labeled columns together. row_filter = RowFilterUnion(filters=[chain1, chain2]) partial_row_data = data_table.read_row(ROW_KEY, filter_=row_filter) assert partial_row_data.row_key == ROW_KEY cells_returned = partial_row_data.cells col_fam1 = cells_returned.pop(COLUMN_FAMILY_ID1) # Make sure COLUMN_FAMILY_ID1 was the only key. assert len(cells_returned) == 0 (cell1_new, ) = col_fam1.pop(COL_NAME1) (cell3_new, ) = col_fam1.pop(COL_NAME2) # Make sure COL_NAME1 and COL_NAME2 were the only keys. assert len(col_fam1) == 0 # Check that cell1 has matching values and gained a label. assert cell1_new.value == cell1.value assert cell1_new.timestamp == cell1.timestamp assert cell1.labels == [] assert cell1_new.labels == [label1] # Check that cell3 has matching values and gained a label. assert cell3_new.value == cell3.value assert cell3_new.timestamp == cell3.timestamp assert cell3.labels == [] assert cell3_new.labels == [label2]
def _columns_filter_helper(columns): """Creates a union filter for a list of columns. :type columns: list :param columns: Iterable containing column names (as strings). Each column name can be either * an entire column family: ``fam`` or ``fam:`` * a single column: ``fam:col`` :rtype: :class:`~google.cloud.bigtable.row.RowFilter` :returns: The union filter created containing all of the matched columns. :raises: :class:`ValueError <exceptions.ValueError>` if there are no filters to union. """ filters = [] for column_family_id, column_qualifier in _get_column_pairs(columns): fam_filter = FamilyNameRegexFilter(column_family_id) if column_qualifier is not None: qual_filter = ColumnQualifierRegexFilter(column_qualifier) combined_filter = RowFilterChain(filters=[fam_filter, qual_filter]) filters.append(combined_filter) else: filters.append(fam_filter) num_filters = len(filters) if num_filters == 0: raise ValueError('Must have at least one filter.') elif num_filters == 1: return filters[0] else: return RowFilterUnion(filters=filters)
def read_rows(self, table_name, column_family_name, column_name, start_key=None, end_key=None, end_inclusive=True): """Reads cells of one column from bigtable. Note that it returns the latest version of the cell. It is only able to read data from one column. Args: table_name (str): column_family_name (str): column_name (bytes): start_key (str): end_key (str): end_inclusive (bool): Returns: list[tuple[str, str]] """ table = self.instance.table(table_name) filter_ = ColumnQualifierRegexFilter(regex=column_name) partial_rows = table.read_rows(start_key, end_key, filter_=filter_, end_inclusive=end_inclusive) def unpack_row(row): return row.row_key.decode('utf-8'), row.cell_value( column_family_name, column_name).decode('utf-8') return [unpack_row(row) for row in partial_rows]
def test_read_with_label_applied(self): self._maybe_emulator_skip("Labels not supported by Bigtable emulator") row = self._table.row(ROW_KEY) self.rows_to_delete.append(row) cell1, _, cell3, _ = self._write_to_row(row, None, row) row.commit() # Combine a label with column 1. label1 = u"label-red" label1_filter = ApplyLabelFilter(label1) col1_filter = ColumnQualifierRegexFilter(COL_NAME1) chain1 = RowFilterChain(filters=[col1_filter, label1_filter]) # Combine a label with column 2. label2 = u"label-blue" label2_filter = ApplyLabelFilter(label2) col2_filter = ColumnQualifierRegexFilter(COL_NAME2) chain2 = RowFilterChain(filters=[col2_filter, label2_filter]) # Bring our two labeled columns together. row_filter = RowFilterUnion(filters=[chain1, chain2]) partial_row_data = self._table.read_row(ROW_KEY, filter_=row_filter) self.assertEqual(partial_row_data.row_key, ROW_KEY) cells_returned = partial_row_data.cells col_fam1 = cells_returned.pop(COLUMN_FAMILY_ID1) # Make sure COLUMN_FAMILY_ID1 was the only key. self.assertEqual(len(cells_returned), 0) cell1_new, = col_fam1.pop(COL_NAME1) cell3_new, = col_fam1.pop(COL_NAME2) # Make sure COL_NAME1 and COL_NAME2 were the only keys. self.assertEqual(len(col_fam1), 0) # Check that cell1 has matching values and gained a label. self.assertEqual(cell1_new.value, cell1.value) self.assertEqual(cell1_new.timestamp, cell1.timestamp) self.assertEqual(cell1.labels, []) self.assertEqual(cell1_new.labels, [label1]) # Check that cell3 has matching values and gained a label. self.assertEqual(cell3_new.value, cell3.value) self.assertEqual(cell3_new.timestamp, cell3.timestamp) self.assertEqual(cell3.labels, []) self.assertEqual(cell3_new.labels, [label2])
def _unlock_annotation(self, annotation_id, operation_id): """ Unlocks a root This is mainly used for cases where multiple roots need to be locked and locking was not sucessful for all of them :param annotation_id: uint64 :param operation_id: str an id that is unique to the process asking to lock the root node :return: bool success """ operation_id_b = serialize_key(operation_id) lock_key = serialize_key("lock") # Build a column filter which tests if a lock was set (== lock column # exists) and if it is still valid (timestamp younger than # LOCK_EXPIRED_TIME_DELTA) and if the given operation_id is still # the active lock holder time_cutoff = datetime.datetime.utcnow() - LOCK_EXPIRED_TIME_DELTA # Comply to resolution of BigTables TimeRange time_cutoff -= datetime.timedelta( microseconds=time_cutoff.microsecond % 1000) time_filter = TimestampRangeFilter(TimestampRange(start=time_cutoff)) column_key_filter = ColumnQualifierRegexFilter(lock_key) value_filter = ColumnQualifierRegexFilter(operation_id_b) # Chain these filters together chained_filter = RowFilterChain( [time_filter, column_key_filter, value_filter]) # Get conditional row using the chained filter root_row = self.table.row(serialize_node_id(annotation_id), filter_=chained_filter) # Delete row if conditions are met (state == True) root_row.delete_cell(self.data_family_id, lock_key, state=True) root_row.commit()
def build_row_filter(row_key_regex=None, column_families=None, columns=None): """ Build a row filter using a combination of row keys, column families, or columns to retrieve. Args: row_key_regex (:obj:`str`, optional): Regular expression for matching row keys. Defaults to None. column_families (:obj:`iter` of :obj:`str`, optional): An iterable of column families to retrieve. Defaults to None. columns (:obj:`iter` of :obj:`str`, optional): An iterable of column names or regular expressions for matching columns. Defaults to None. Returns: RowFilter: The built row filter from passed in parameters. If no parameters, None is returned. """ if (row_key_regex is not None and not isinstance(row_key_regex, six.string_types)): raise TypeError('row_key_regex must be a str or unicode type.') if (column_families is not None and not isinstance(column_families, collections.Sequence)): raise TypeError('column_families must be an iterable.') if columns is not None and not isinstance(columns, collections.Sequence): raise TypeError('columns must be an iterable.') filters = [] # Build a filter for row keys. if row_key_regex: row_key_filter = RowKeyRegexFilter(row_key_regex) filters.append(row_key_filter) # Build filters for column families. if column_families: cf_filters = [ColumnRangeFilter(cf) for cf in column_families] if len(cf_filters) > 1: filters.append(RowFilterUnion(cf_filters)) else: filters.append(cf_filters[0]) # Build filters for columns. if columns: col_filters = [ColumnQualifierRegexFilter(col) for col in columns] if len(col_filters) > 1: filters.append(RowFilterUnion(col_filters)) else: filters.append(col_filters[0]) if len(filters) == 1: return filters[0] else: return RowFilterChain(filters=filters) if filters else None
def _check_and_renew_annotation_lock_single(self, root_id, operation_id): """ Tests if the root is locked with the provided operation_id and renews the lock to reset the time_stam This is mainly used before executing a bulk write :param root_id: uint64 :param operation_id: str an id that is unique to the process asking to lock the root node :return: bool success """ operation_id_b = serialize_key(operation_id) lock_key = serialize_key("lock") new_parents_key = serialize_key("new_parents") # Build a column filter which tests if a lock was set (== lock column # exists) and if the given operation_id is still the active lock holder. column_key_filter = ColumnQualifierRegexFilter(operation_id_b) value_filter = ColumnQualifierRegexFilter(operation_id_b) # Chain these filters together chained_filter = RowFilterChain([column_key_filter, value_filter]) # Get conditional row using the chained filter root_row = self.table.row(serialize_node_id(root_id), filter_=chained_filter) # Set row lock if condition returns a result (state == True) root_row.set_cell(self.data_family_id, lock_key, operation_id_b, state=False) # The lock was acquired when set_cell returns True (state) lock_acquired = not root_row.commit() return lock_acquired
def _filter_chain_helper(column=None, versions=None, timestamp=None, filters=None): """Create filter chain to limit a results set. :type column: str :param column: (Optional) The column (``fam:col``) to be selected with the filter. :type versions: int :param versions: (Optional) The maximum number of cells to return. :type timestamp: int :param timestamp: (Optional) Timestamp (in milliseconds since the epoch). If specified, only cells returned before (or at) the timestamp will be matched. :type filters: list :param filters: (Optional) List of existing filters to be extended. :rtype: :class:`~google.cloud.bigtable.row.RowFilter` :returns: The chained filter created, or just a single filter if only one was needed. :raises: :class:`ValueError <exceptions.ValueError>` if there are no filters to chain. """ if filters is None: filters = [] if column is not None: if isinstance(column, six.binary_type): column = column.decode('utf-8') column_family_id, column_qualifier = column.split(':') fam_filter = FamilyNameRegexFilter(column_family_id) qual_filter = ColumnQualifierRegexFilter(column_qualifier) filters.extend([fam_filter, qual_filter]) if versions is not None: filters.append(CellsColumnLimitFilter(versions)) time_range = _convert_to_time_range(timestamp=timestamp) if time_range is not None: filters.append(TimestampRangeFilter(time_range)) num_filters = len(filters) if num_filters == 0: raise ValueError('Must have at least one filter.') elif num_filters == 1: return filters[0] else: return RowFilterChain(filters=filters)
def main(project_id, instance_id, table_id): client = bigtable.Client(project=project_id, admin=True) instance = client.instance(instance_id) table = instance.table(table_id) column_family_id = 'ST' a = [] col1_filter = RowKeyRegexFilter(b'6852#88DC961302E8#201703') col2_filter = ValueRegexFilter('11820581') # col3_filter = ValueRangeFilter("0".encode('utf-8'), "1488784881000".encode('utf-8')) chain1 = RowFilterChain(filters=[col1_filter, col2_filter]) partial_rows = table.read_rows(filter_=col1_filter) partial_rows.consume_all() uniques = set() for row_key, row in partial_rows.rows.items(): key = row_key.decode('utf-8') print(key) try: cell = row.cells['UNQS'] for k in cell.keys(): uniques.add(k) # for key, i in cell.keys(): # print(key, i) # a.append(key) cell = cell[cell.keys()[0]] except: e = sys.exc_info()[0] print(e) # else: # print(cell) # for x, y in cell: # print(123, x.value, y) # value = cell.value.decode('utf-8') # val = { "Date": cell.timestamp.strftime("%a, %d %b %Y %H:%M:%S"), "Value": float(value) } # val = 123 # a.append(val) # cell = cell[cell.keys()[0]][0] # value = cell.value.decode('utf-8') # val = { "Date": cell.timestamp.strftime("%a, %d %b %Y %H:%M:%S"), "Value": float(value) } print("Hey, there are %d uniques online today!" % len(uniques)) return print('Scanning tables:') col1_filter = ColumnQualifierRegexFilter(b'TX_BYTES:([a-zA-Z0-9]{12})') col2_filter = ValueRegexFilter('11820581') col3_filter = ValueRangeFilter("0".encode('utf-8'), "1488784881000".encode('utf-8')) chain1 = RowFilterChain(filters=[col1_filter, col2_filter, col3_filter]) partial_rows = table.read_rows(filter_=chain1) partial_rows.consume_all() a = [] for row_key, row in partial_rows.rows.items(): key = row_key.decode('utf-8') cell = row.cells[column_family_id] cell = cell[cell.keys()[0]][0] value = cell.value.decode('utf-8') val = { "Date": cell.timestamp.strftime("%a, %d %b %Y %H:%M:%S"), "Value": float(value) } a.append(val) # print(a) column_family_id = 'CAPS' partial_rows = table.read_rows() partial_rows.consume_all() for row_key, row in partial_rows.rows.items(): key = row_key.decode('utf-8') cell = row.cells[column_family_id]['CAPS_2'][0] a = struct.unpack(">ll", cell.value)[1] print(a) # cell = cell[cell.keys()[0]][0] # value = cell.value.decode('utf-8') # val = { "Date": cell.timestamp.strftime("%a, %d %b %Y %H:%M:%S"), "Value": float(value) } # a.append(val) print(a) return incomplete_data = json_to_dataframe(a) # full_range = pd.date_range(incomplete_data['Date'].min(), incomplete_data['Date'].max()) incomplete_data['Date'] = pd.to_datetime(incomplete_data['Date']) incomplete_data.set_index(['Date'], inplace=True) # problem_data = incomplete_data.sort_index().reindex(full_range) # print(incomplete_data.head(100)) # print(problem_data.head(100)) axis = incomplete_data['Value']#.plot(kind='bar') upsampled = axis.resample('5T').mean() interpolated = upsampled.interpolate(method='time') # print(interpolated.head(100)) interpolated.plot(kind="line")