def run_query(self, table_name, column_names, limit, offset, startrow, endrow, getOnlyKeys, start_inclusive, end_inclusive): starttime = time.time() elist = [ERROR_HT] client = None try: client = self.__initConnection() row_intervals = [ ttypes.RowInterval(startrow, start_inclusive, endrow, end_inclusive) ] cell_intervals = None include_deletes = 0 scan_spec = ttypes.ScanSpec(row_intervals, cell_intervals, include_deletes, 1, limit + offset, 0, None, column_names) count = 0 cells = client.get_cells(self.ns, table_name, scan_spec) for cell in cells: count = count + 1 if count > offset: if getOnlyKeys: elist += [cell.row_key] else: elist += [cell.value] except: elist[0] += "Exception thrown while running a scanner" endtime = time.time() if PROFILING: self.logger.debug("HT RUN_QUERY: %s" % str(endtime - starttime)) self.__closeConnection(client) return elist
def batch_get_entity(self, table_name, row_keys, column_names): """Allows access to multiple rows with a single call Args: table_name: A str, the table to access. row_keys: A list of keys to access. column_names: A list of columns to access. Raises: TypeError: Bad argument types. Returns: A dictionary of {key:{column_name:value,...}}. """ if not isinstance(table_name, str): raise TypeError("Expected str") if not isinstance(column_names, list): raise TypeError("Expected list") if not isinstance(row_keys, list): raise TypeError("Expected list") row_keys = [self.__encode(row) for row in row_keys] ret = {} row_intervals = [] cell_intervals = None include_deletes = False for row in row_keys: row_intervals.append(ttypes.RowInterval(row, True, row, True)) scan_spec = ttypes.ScanSpec(row_intervals, cell_intervals, include_deletes, 1, 0, 0, None, column_names) res = self.conn.get_cells(self.ns, table_name, scan_spec) for cell in res: if self.__decode(cell.key.row) in ret: # update the dictionary col_dict = ret[self.__decode(cell.key.row)] else: # first time seen col_dict = {} col_dict[cell.key.column_family] = cell.value ret[self.__decode(cell.key.row)] = col_dict # If nothing was returned for any cell, put in empty values. for row in row_keys: if self.__decode(row) not in ret: col_dict = {} ret[self.__decode(row)] = col_dict return ret
def range_query(self, table_name, column_names, start_key, end_key, limit, offset=0, start_inclusive=True, end_inclusive=True, keys_only=False): """ Gets a dense range ordered by keys. Returns an ordered list of a dictionary of [key:{column1:value1, column2:value2},...] or a list of keys if keys only. Args: table_name: Name of table to access column_names: Columns which get returned within the key range start_key: String for which the query starts at end_key: String for which the query ends at limit: Maximum number of results to return offset: Number to cut off from the results [offset:] start_inclusive: Boolean if results should include the start_key end_inclusive: Boolean if results should include the end_key keys_only: Boolean if to only keys and not values Raises: TypeError: Bad argument types Return: List of ordered results. """ if not isinstance(table_name, str): raise TypeError("Expected str") if not isinstance(column_names, list): raise TypeError("Expected list") if not isinstance(start_key, str): raise TypeError("Expected str") if not isinstance(end_key, str): raise TypeError("Expected str") if not isinstance(limit, int) and not isinstance(limit, long): raise TypeError("Expected int or long") if not isinstance(offset, int): raise TypeError("Expected int") start_key = self.__encode(start_key) end_key = self.__encode(end_key) # We add two extra rows in case we exclude the start/end keys # This makes sure the limit is upheld correctly, where we have # to remove the first and last key row_count = limit if not start_inclusive: row_count += 1 if not end_inclusive: row_count += 1 row_intervals = [] row_intervals.append( ttypes.RowInterval(start_key, start_inclusive, end_key, end_inclusive)) cell_intervals = None include_deletes = False scan_spec = ttypes.ScanSpec( row_intervals, cell_intervals, include_deletes, 1, # max revisions row_count, 0, None, column_names) res = self.conn.get_cells(self.ns, table_name, scan_spec) results = [] last_row = None for cell in res: # the current list element needs to be updated if cell.key.row == last_row: if not keys_only: row_dict = results[-1] col_dict = row_dict[self.__decode(cell.key.row)] col_dict[cell.key.column_family] = cell.value results[-1] = {self.__decode(cell.key.row): col_dict} # add a new list element for this item else: last_row = cell.key.row if keys_only: results.append(self.__decode(cell.key.row)) else: col_dict = {} col_dict[cell.key.column_family] = cell.value results.append({self.__decode(cell.key.row): col_dict}) if not start_inclusive and len(results) > 0: if start_key in results[0]: results = results[1:] if not end_inclusive and len(results) > 0: if end_key in results[-1]: results = results[:-1] if len(results) > limit: results = results[:limit] if offset != 0 and offset <= len(results): results = results[offset:] return results