Ejemplo n.º 1
0
    def do_get(self, context, ticket: fl.Ticket):
        ticket_info: AFMTicket = AFMTicket.fromJSON(ticket.ticket)
        if ticket_info.columns is None:
            raise ValueError("Columns must be specified in ticket")

        logger.info('retrieving dataset',
                    extra={
                        'ticket': ticket.ticket,
                        DataSetID: ticket_info.asset_name
                    })
        with Config(self.config_path) as config:
            asset = asset_from_config(
                config,
                ticket_info.asset_name,
                partition_path=ticket_info.partition_path)

        if asset.connection_type == "flight":
            schema, batches = asset.flight.do_get(context, ticket)
            if ticket_info.columns:
                asset.add_action(
                    actions.FilterColumns(columns=ticket_info.columns,
                                          description="filter columns",
                                          options=None))
        else:
            schema, batches = self._read_asset(asset, ticket_info.columns)

        schema = transform_schema(asset.actions, schema)
        batches = transform(asset.actions, batches)
        return fl.GeneratorStream(schema, batches)
Ejemplo n.º 2
0
 def do_get(self, context, ticket):
     data = [
         pa.array([-10, -5, 0, 5, 10])
     ]
     table = pa.Table.from_arrays(data, names=['a'])
     return flight.GeneratorStream(
         table.schema,
         self.number_batches(table))
Ejemplo n.º 3
0
    def do_get(self, context, ticket):
        data1 = [pa.array([-10, -5, 0, 5, 10], type=pa.int32())]
        data2 = [pa.array([-10.0, -5.0, 0.0, 5.0, 10.0], type=pa.float64())]
        assert data1.type != data2.type
        table1 = pa.Table.from_arrays(data1, names=['a'])
        table2 = pa.Table.from_arrays(data2, names=['a'])
        assert table1.schema == self.schema

        return flight.GeneratorStream(self.schema, [table1, table2])
Ejemplo n.º 4
0
    def do_get(self, context, ticket):
        dataset = self.datasets[ticket.ticket]

        # Duplicated generator just to get the schema :(
        b = dataset.to_batches(
            columns=["canonical_ID", "enumerated_smiles", "achiral_fp"])
        head = next(b)
        schema = head.schema
        print('schema done')

        batches = dataset.to_batches(
            columns=["canonical_ID", "enumerated_smiles", "achiral_fp"])
        return fl.GeneratorStream(schema, batches)
Ejemplo n.º 5
0
    def do_get(self, context, ticket: fl.Ticket):
        ticket_info: AFMTicket = AFMTicket.fromJSON(ticket.ticket)
        if ticket_info.columns is None:
            raise ValueError("Columns must be specified in ticket")

        with Config(self.config_path) as config:
            asset = asset_from_config(config, ticket_info.asset_name)

        schema, batches = self._read_asset(asset, ticket_info.columns)

        schema = transform_schema(asset.actions, schema)
        batches = transform(asset.actions, batches)
        return fl.GeneratorStream(schema, batches)
Ejemplo n.º 6
0
 def do_get(self, context, ticket):
     return flight.GeneratorStream(pa.schema([('a', pa.int32())]),
                                   self.slow_stream())
Ejemplo n.º 7
0
 def do_get(self, context, ticket):
     return flight.GeneratorStream(
         self.last_message.schema,
         self.last_message.to_batches(chunksize=1024))