def do_get(self, context, ticket: fl.Ticket): ticket_info: AFMTicket = AFMTicket.fromJSON(ticket.ticket) if ticket_info.columns is None: raise ValueError("Columns must be specified in ticket") logger.info('retrieving dataset', extra={ 'ticket': ticket.ticket, DataSetID: ticket_info.asset_name }) with Config(self.config_path) as config: asset = asset_from_config( config, ticket_info.asset_name, partition_path=ticket_info.partition_path) if asset.connection_type == "flight": schema, batches = asset.flight.do_get(context, ticket) if ticket_info.columns: asset.add_action( actions.FilterColumns(columns=ticket_info.columns, description="filter columns", options=None)) else: schema, batches = self._read_asset(asset, ticket_info.columns) schema = transform_schema(asset.actions, schema) batches = transform(asset.actions, batches) return fl.GeneratorStream(schema, batches)
def do_get(self, context, ticket): data = [ pa.array([-10, -5, 0, 5, 10]) ] table = pa.Table.from_arrays(data, names=['a']) return flight.GeneratorStream( table.schema, self.number_batches(table))
def do_get(self, context, ticket): data1 = [pa.array([-10, -5, 0, 5, 10], type=pa.int32())] data2 = [pa.array([-10.0, -5.0, 0.0, 5.0, 10.0], type=pa.float64())] assert data1.type != data2.type table1 = pa.Table.from_arrays(data1, names=['a']) table2 = pa.Table.from_arrays(data2, names=['a']) assert table1.schema == self.schema return flight.GeneratorStream(self.schema, [table1, table2])
def do_get(self, context, ticket): dataset = self.datasets[ticket.ticket] # Duplicated generator just to get the schema :( b = dataset.to_batches( columns=["canonical_ID", "enumerated_smiles", "achiral_fp"]) head = next(b) schema = head.schema print('schema done') batches = dataset.to_batches( columns=["canonical_ID", "enumerated_smiles", "achiral_fp"]) return fl.GeneratorStream(schema, batches)
def do_get(self, context, ticket: fl.Ticket): ticket_info: AFMTicket = AFMTicket.fromJSON(ticket.ticket) if ticket_info.columns is None: raise ValueError("Columns must be specified in ticket") with Config(self.config_path) as config: asset = asset_from_config(config, ticket_info.asset_name) schema, batches = self._read_asset(asset, ticket_info.columns) schema = transform_schema(asset.actions, schema) batches = transform(asset.actions, batches) return fl.GeneratorStream(schema, batches)
def do_get(self, context, ticket): return flight.GeneratorStream(pa.schema([('a', pa.int32())]), self.slow_stream())
def do_get(self, context, ticket): return flight.GeneratorStream( self.last_message.schema, self.last_message.to_batches(chunksize=1024))