Esempio n. 1
0
def test_read_with_label_applied(data_table, rows_to_delete, skip_on_emulator):
    from google.cloud.bigtable.row_filters import ApplyLabelFilter
    from google.cloud.bigtable.row_filters import ColumnQualifierRegexFilter
    from google.cloud.bigtable.row_filters import RowFilterChain
    from google.cloud.bigtable.row_filters import RowFilterUnion

    row = data_table.direct_row(ROW_KEY)
    rows_to_delete.append(row)

    cell1, _, cell3, _ = _write_to_row(row, None, row, None)
    row.commit()

    # Combine a label with column 1.
    label1 = "label-red"
    label1_filter = ApplyLabelFilter(label1)
    col1_filter = ColumnQualifierRegexFilter(COL_NAME1)
    chain1 = RowFilterChain(filters=[col1_filter, label1_filter])

    # Combine a label with column 2.
    label2 = "label-blue"
    label2_filter = ApplyLabelFilter(label2)
    col2_filter = ColumnQualifierRegexFilter(COL_NAME2)
    chain2 = RowFilterChain(filters=[col2_filter, label2_filter])

    # Bring our two labeled columns together.
    row_filter = RowFilterUnion(filters=[chain1, chain2])
    partial_row_data = data_table.read_row(ROW_KEY, filter_=row_filter)
    assert partial_row_data.row_key == ROW_KEY

    cells_returned = partial_row_data.cells
    col_fam1 = cells_returned.pop(COLUMN_FAMILY_ID1)
    # Make sure COLUMN_FAMILY_ID1 was the only key.
    assert len(cells_returned) == 0

    (cell1_new, ) = col_fam1.pop(COL_NAME1)
    (cell3_new, ) = col_fam1.pop(COL_NAME2)
    # Make sure COL_NAME1 and COL_NAME2 were the only keys.
    assert len(col_fam1) == 0

    # Check that cell1 has matching values and gained a label.
    assert cell1_new.value == cell1.value
    assert cell1_new.timestamp == cell1.timestamp
    assert cell1.labels == []
    assert cell1_new.labels == [label1]

    # Check that cell3 has matching values and gained a label.
    assert cell3_new.value == cell3.value
    assert cell3_new.timestamp == cell3.timestamp
    assert cell3.labels == []
    assert cell3_new.labels == [label2]
def _columns_filter_helper(columns):
    """Creates a union filter for a list of columns.

    :type columns: list
    :param columns: Iterable containing column names (as strings). Each column
                    name can be either

                      * an entire column family: ``fam`` or ``fam:``
                      * a single column: ``fam:col``

    :rtype: :class:`~google.cloud.bigtable.row.RowFilter`
    :returns: The union filter created containing all of the matched columns.
    :raises: :class:`ValueError <exceptions.ValueError>` if there are no
             filters to union.
    """
    filters = []
    for column_family_id, column_qualifier in _get_column_pairs(columns):
        fam_filter = FamilyNameRegexFilter(column_family_id)
        if column_qualifier is not None:
            qual_filter = ColumnQualifierRegexFilter(column_qualifier)
            combined_filter = RowFilterChain(filters=[fam_filter, qual_filter])
            filters.append(combined_filter)
        else:
            filters.append(fam_filter)

    num_filters = len(filters)
    if num_filters == 0:
        raise ValueError('Must have at least one filter.')
    elif num_filters == 1:
        return filters[0]
    else:
        return RowFilterUnion(filters=filters)
Esempio n. 3
0
    def read_rows(self,
                  table_name,
                  column_family_name,
                  column_name,
                  start_key=None,
                  end_key=None,
                  end_inclusive=True):
        """Reads cells of one column from bigtable. Note that it returns the latest
        version of the cell. It is only able to read data from one column.

        Args:
            table_name (str):
            column_family_name (str):
            column_name (bytes):
            start_key (str):
            end_key (str):
            end_inclusive (bool):

        Returns:
            list[tuple[str, str]]
        """
        table = self.instance.table(table_name)
        filter_ = ColumnQualifierRegexFilter(regex=column_name)
        partial_rows = table.read_rows(start_key,
                                       end_key,
                                       filter_=filter_,
                                       end_inclusive=end_inclusive)

        def unpack_row(row):
            return row.row_key.decode('utf-8'), row.cell_value(
                column_family_name, column_name).decode('utf-8')

        return [unpack_row(row) for row in partial_rows]
Esempio n. 4
0
    def test_read_with_label_applied(self):
        self._maybe_emulator_skip("Labels not supported by Bigtable emulator")
        row = self._table.row(ROW_KEY)
        self.rows_to_delete.append(row)

        cell1, _, cell3, _ = self._write_to_row(row, None, row)
        row.commit()

        # Combine a label with column 1.
        label1 = u"label-red"
        label1_filter = ApplyLabelFilter(label1)
        col1_filter = ColumnQualifierRegexFilter(COL_NAME1)
        chain1 = RowFilterChain(filters=[col1_filter, label1_filter])

        # Combine a label with column 2.
        label2 = u"label-blue"
        label2_filter = ApplyLabelFilter(label2)
        col2_filter = ColumnQualifierRegexFilter(COL_NAME2)
        chain2 = RowFilterChain(filters=[col2_filter, label2_filter])

        # Bring our two labeled columns together.
        row_filter = RowFilterUnion(filters=[chain1, chain2])
        partial_row_data = self._table.read_row(ROW_KEY, filter_=row_filter)
        self.assertEqual(partial_row_data.row_key, ROW_KEY)

        cells_returned = partial_row_data.cells
        col_fam1 = cells_returned.pop(COLUMN_FAMILY_ID1)
        # Make sure COLUMN_FAMILY_ID1 was the only key.
        self.assertEqual(len(cells_returned), 0)

        cell1_new, = col_fam1.pop(COL_NAME1)
        cell3_new, = col_fam1.pop(COL_NAME2)
        # Make sure COL_NAME1 and COL_NAME2 were the only keys.
        self.assertEqual(len(col_fam1), 0)

        # Check that cell1 has matching values and gained a label.
        self.assertEqual(cell1_new.value, cell1.value)
        self.assertEqual(cell1_new.timestamp, cell1.timestamp)
        self.assertEqual(cell1.labels, [])
        self.assertEqual(cell1_new.labels, [label1])

        # Check that cell3 has matching values and gained a label.
        self.assertEqual(cell3_new.value, cell3.value)
        self.assertEqual(cell3_new.timestamp, cell3.timestamp)
        self.assertEqual(cell3.labels, [])
        self.assertEqual(cell3_new.labels, [label2])
Esempio n. 5
0
    def _unlock_annotation(self, annotation_id, operation_id):
        """ Unlocks a root

        This is mainly used for cases where multiple roots need to be locked and
        locking was not sucessful for all of them

        :param annotation_id: uint64
        :param operation_id: str
            an id that is unique to the process asking to lock the root node
        :return: bool
            success
        """
        operation_id_b = serialize_key(operation_id)

        lock_key = serialize_key("lock")

        # Build a column filter which tests if a lock was set (== lock column
        # exists) and if it is still valid (timestamp younger than
        # LOCK_EXPIRED_TIME_DELTA) and if the given operation_id is still
        # the active lock holder

        time_cutoff = datetime.datetime.utcnow() - LOCK_EXPIRED_TIME_DELTA

        # Comply to resolution of BigTables TimeRange
        time_cutoff -= datetime.timedelta(
            microseconds=time_cutoff.microsecond % 1000)

        time_filter = TimestampRangeFilter(TimestampRange(start=time_cutoff))

        column_key_filter = ColumnQualifierRegexFilter(lock_key)

        value_filter = ColumnQualifierRegexFilter(operation_id_b)

        # Chain these filters together
        chained_filter = RowFilterChain(
            [time_filter, column_key_filter, value_filter])

        # Get conditional row using the chained filter
        root_row = self.table.row(serialize_node_id(annotation_id),
                                  filter_=chained_filter)

        # Delete row if conditions are met (state == True)
        root_row.delete_cell(self.data_family_id, lock_key, state=True)

        root_row.commit()
def build_row_filter(row_key_regex=None, column_families=None, columns=None):
    """
    Build a row filter using a combination of row keys, column families, or
    columns to retrieve.

    Args:
        row_key_regex (:obj:`str`, optional): Regular expression for matching
            row keys. Defaults to None.
        column_families (:obj:`iter` of :obj:`str`, optional): An iterable of
            column families to retrieve. Defaults to None.
        columns (:obj:`iter` of :obj:`str`, optional): An iterable of column
            names or regular expressions for matching columns. Defaults to
            None.

    Returns:
        RowFilter: The built row filter from passed in parameters. If no
            parameters, None is returned.
    """
    if (row_key_regex is not None and
            not isinstance(row_key_regex, six.string_types)):
        raise TypeError('row_key_regex must be a str or unicode type.')
    if (column_families is not None and
            not isinstance(column_families, collections.Sequence)):
        raise TypeError('column_families must be an iterable.')
    if columns is not None and not isinstance(columns, collections.Sequence):
        raise TypeError('columns must be an iterable.')

    filters = []

    # Build a filter for row keys.
    if row_key_regex:
        row_key_filter = RowKeyRegexFilter(row_key_regex)
        filters.append(row_key_filter)

    # Build filters for column families.
    if column_families:
        cf_filters = [ColumnRangeFilter(cf) for cf in column_families]
        if len(cf_filters) > 1:
            filters.append(RowFilterUnion(cf_filters))
        else:
            filters.append(cf_filters[0])

    # Build filters for columns.
    if columns:
        col_filters = [ColumnQualifierRegexFilter(col) for col in columns]
        if len(col_filters) > 1:
            filters.append(RowFilterUnion(col_filters))
        else:
            filters.append(col_filters[0])

    if len(filters) == 1:
        return filters[0]
    else:
        return RowFilterChain(filters=filters) if filters else None
Esempio n. 7
0
    def _check_and_renew_annotation_lock_single(self, root_id, operation_id):
        """ Tests if the root is locked with the provided operation_id and
        renews the lock to reset the time_stam

        This is mainly used before executing a bulk write

        :param root_id: uint64
        :param operation_id: str
            an id that is unique to the process asking to lock the root node
        :return: bool
            success
        """
        operation_id_b = serialize_key(operation_id)

        lock_key = serialize_key("lock")
        new_parents_key = serialize_key("new_parents")

        # Build a column filter which tests if a lock was set (== lock column
        # exists) and if the given operation_id is still the active lock holder.

        column_key_filter = ColumnQualifierRegexFilter(operation_id_b)
        value_filter = ColumnQualifierRegexFilter(operation_id_b)

        # Chain these filters together
        chained_filter = RowFilterChain([column_key_filter, value_filter])

        # Get conditional row using the chained filter
        root_row = self.table.row(serialize_node_id(root_id),
                                  filter_=chained_filter)

        # Set row lock if condition returns a result (state == True)
        root_row.set_cell(self.data_family_id,
                          lock_key,
                          operation_id_b,
                          state=False)

        # The lock was acquired when set_cell returns True (state)
        lock_acquired = not root_row.commit()

        return lock_acquired
def _filter_chain_helper(column=None,
                         versions=None,
                         timestamp=None,
                         filters=None):
    """Create filter chain to limit a results set.

    :type column: str
    :param column: (Optional) The column (``fam:col``) to be selected
                   with the filter.

    :type versions: int
    :param versions: (Optional) The maximum number of cells to return.

    :type timestamp: int
    :param timestamp: (Optional) Timestamp (in milliseconds since the
                      epoch). If specified, only cells returned before (or
                      at) the timestamp will be matched.

    :type filters: list
    :param filters: (Optional) List of existing filters to be extended.

    :rtype: :class:`~google.cloud.bigtable.row.RowFilter`
    :returns: The chained filter created, or just a single filter if only
              one was needed.
    :raises: :class:`ValueError <exceptions.ValueError>` if there are no
             filters to chain.
    """
    if filters is None:
        filters = []

    if column is not None:
        if isinstance(column, six.binary_type):
            column = column.decode('utf-8')
        column_family_id, column_qualifier = column.split(':')
        fam_filter = FamilyNameRegexFilter(column_family_id)
        qual_filter = ColumnQualifierRegexFilter(column_qualifier)
        filters.extend([fam_filter, qual_filter])
    if versions is not None:
        filters.append(CellsColumnLimitFilter(versions))
    time_range = _convert_to_time_range(timestamp=timestamp)
    if time_range is not None:
        filters.append(TimestampRangeFilter(time_range))

    num_filters = len(filters)
    if num_filters == 0:
        raise ValueError('Must have at least one filter.')
    elif num_filters == 1:
        return filters[0]
    else:
        return RowFilterChain(filters=filters)
def main(project_id, instance_id, table_id):
    client = bigtable.Client(project=project_id, admin=True)
    instance = client.instance(instance_id)
    table = instance.table(table_id)
    column_family_id = 'ST'

    a = []

    col1_filter = RowKeyRegexFilter(b'6852#88DC961302E8#201703')
    col2_filter = ValueRegexFilter('11820581')
    # col3_filter = ValueRangeFilter("0".encode('utf-8'), "1488784881000".encode('utf-8'))

    chain1 = RowFilterChain(filters=[col1_filter, col2_filter])

    partial_rows = table.read_rows(filter_=col1_filter)
    partial_rows.consume_all()

    uniques = set()
    for row_key, row in partial_rows.rows.items():
        key = row_key.decode('utf-8')
        print(key)
        try:
            cell = row.cells['UNQS']
            for k in cell.keys():
                uniques.add(k)
            # for key, i in cell.keys():
            #     print(key, i)
                # a.append(key)
            cell = cell[cell.keys()[0]]

        except:
            e = sys.exc_info()[0]
            print(e)

        # else:
            # print(cell)
            # for x, y in cell:
            #     print(123, x.value, y)
            # value = cell.value.decode('utf-8')
            # val = { "Date": cell.timestamp.strftime("%a, %d %b %Y %H:%M:%S"), "Value": float(value) }
            # val = 123
            # a.append(val)

        # cell = cell[cell.keys()[0]][0]
        # value = cell.value.decode('utf-8')
        # val = { "Date": cell.timestamp.strftime("%a, %d %b %Y %H:%M:%S"), "Value": float(value) }


    print("Hey, there are %d uniques online today!" % len(uniques))

    return

    print('Scanning tables:')

    col1_filter = ColumnQualifierRegexFilter(b'TX_BYTES:([a-zA-Z0-9]{12})')
    col2_filter = ValueRegexFilter('11820581')
    col3_filter = ValueRangeFilter("0".encode('utf-8'), "1488784881000".encode('utf-8'))

    chain1 = RowFilterChain(filters=[col1_filter, col2_filter, col3_filter])

    partial_rows = table.read_rows(filter_=chain1)
    partial_rows.consume_all()

    a = []

    for row_key, row in partial_rows.rows.items():
        key = row_key.decode('utf-8')
        cell = row.cells[column_family_id]
        cell = cell[cell.keys()[0]][0]
        value = cell.value.decode('utf-8')
        val = { "Date": cell.timestamp.strftime("%a, %d %b %Y %H:%M:%S"), "Value": float(value) }
        a.append(val)


    # print(a)

    column_family_id = 'CAPS'

    partial_rows = table.read_rows()
    partial_rows.consume_all()

    for row_key, row in partial_rows.rows.items():
        key = row_key.decode('utf-8')

        cell = row.cells[column_family_id]['CAPS_2'][0]

        a = struct.unpack(">ll", cell.value)[1]

        print(a)

        # cell = cell[cell.keys()[0]][0]
        # value = cell.value.decode('utf-8')
        # val = { "Date": cell.timestamp.strftime("%a, %d %b %Y %H:%M:%S"), "Value": float(value) }
        # a.append(val)

    print(a)

    return

    incomplete_data = json_to_dataframe(a)

    # full_range = pd.date_range(incomplete_data['Date'].min(), incomplete_data['Date'].max())
    incomplete_data['Date'] = pd.to_datetime(incomplete_data['Date'])
    incomplete_data.set_index(['Date'], inplace=True)

    # problem_data = incomplete_data.sort_index().reindex(full_range)
    # print(incomplete_data.head(100))
    # print(problem_data.head(100))

    axis = incomplete_data['Value']#.plot(kind='bar')
    upsampled = axis.resample('5T').mean()
    interpolated = upsampled.interpolate(method='time')
    # print(interpolated.head(100))

    interpolated.plot(kind="line")