def filter_limit_row_regex(project_id, instance_id, table_id): client = bigtable.Client(project=project_id, admin=True) instance = client.instance(instance_id) table = instance.table(table_id) rows = table.read_rows( filter_=row_filters.RowKeyRegexFilter(".*#20190501$".encode("utf-8"))) for row in rows: print_row(row)
def ScanAttributes(self, subject_prefix, attributes, after_urn=None, max_records=None, token=None, relaxed_order=False): subject_prefix = self._CleanSubjectPrefix(subject_prefix) after_urn = self._CleanAfterURN(after_urn, subject_prefix) # Turn subject prefix into an actual regex subject_prefix += ".*" self.security_manager.CheckDataStoreAccess(token, [subject_prefix], "rq") subject_filter = row_filters.RowKeyRegexFilter( utils.SmartStr(subject_prefix)) latest_value = row_filters.CellsColumnLimitFilter(1) attribute_filters = self._GetAttributeFilterUnion(attributes) # Subject AND (attr1 OR attr2) AND latest_value query_filter = row_filters.RowFilterChain( [subject_filter, attribute_filters, latest_value]) # The API results include the start row, we want to exclude it, append a # null to do so. if after_urn is not None: after_urn += "\x00" rows_data = self.CallWithRetry( self.table.read_rows, "read", start_key=after_urn, limit=max_records, filter_=query_filter) # Ideally we should be able to stream and yield, but it seems we can't: # https://github.com/GoogleCloudPlatform/google-cloud-python/issues/1812 self.CallWithRetry(rows_data.consume_all, "read") results = [] if rows_data.rows: for subject, row_data in rows_data.rows.iteritems(): subject_results = self._ReOrderRowResults(row_data) results.append((subject, subject_results)) return sorted(results, key=lambda x: x[0])
def get_basic_row_iterator(self): """Convenience function to obtain iterator, maybe using prefix.""" table = self.table row_filter = None if isinstance(self.prefix, str): prefix = self.prefix if not prefix.endswith(".*"): prefix += ".*" row_filter = row_filters.RowKeyRegexFilter(regex=prefix) partial_rows = table.read_rows(filter_=row_filter) return partial_rows
print('Looking for the [{}] table.'.format(args.cbt_table_name)) credentials = service_account.Credentials.from_service_account_file(SERVICE_ACCOUNT_FILE, scopes=SCOPES) client = bigtable.Client(args.gcp_project_id, admin=True, credentials=credentials) instance = client.instance(args.cbt_instance_id) table = instance.table(args.cbt_table_name) if not table.exists(): print("Table doesn't exist.") exit() else: print("Table found.") #TRAINING LOOP for i in tqdm(range(5000), "Training"): #QUERY TABLE FOR PARTIAL ROWS regex_filter = '^cartpole_trajectory_{}$'.format(i) row_filter = row_filters.RowKeyRegexFilter(regex_filter) filtered_rows = table.read_rows(filter_=row_filter) for row in filtered_rows: bytes_traj = row.cells['trajectory']['traj'.encode()][0].value bytes_info = row.cells['trajectory']['info'.encode()][0].value traj, info = Trajectory(), Info() traj.ParseFromString(bytes_traj) info.ParseFromString(bytes_info) traj_shape = np.append(np.array(info.num_steps), np.array(info.vector_obs_spec)) observations = np.array(traj.vector_obs).reshape(traj_shape) traj_obs = np.rollaxis(np.array([observations, np.roll(observations, 1)]), 0 , 2) traj_actions = np.rollaxis(np.array([traj.actions, np.roll(traj.actions, 1)]), 0 , 2) traj_rewards = np.rollaxis(np.array([traj.rewards, np.roll(traj.rewards, 1)]), 0 , 2) traj_discounts = np.ones((info.num_steps,2))