Esempio n. 1
0
File: csv.py Progetto: vijoin/ibis
def csv_pre_execute_selection(
    op: ops.Node,
    client: CSVClient,
    scope: Scope,
    timecontext: TimeContext = None,
    **kwargs,
):
    tables = filter(
        lambda t: scope.get_value(t, timecontext) is None,
        physical_tables(op.table.op()),
    )

    ops = Scope()
    for table in tables:
        path = client.dictionary[table.name]
        usecols = None

        if op.selections:
            header = _read_csv(path, schema=table.schema, header=0, nrows=1)
            usecols = [
                getattr(s.op(), 'name', None) or s.get_name()
                for s in op.selections
            ]

            # we cannot read all the columns that we would like
            if len(pd.Index(usecols) & header.columns) != len(usecols):
                usecols = None
        result = _read_csv(path, table.schema, usecols=usecols, header=0)
        ops = ops.merge_scope(Scope({table: result}, timecontext))

    return ops
Esempio n. 2
0
def csv_pre_execute(op, client, scope, **kwargs):

    tables = physical_tables(op.table.op())

    ops = {}
    for table in tables:
        if table not in scope:

            path = client.dictionary[table.name]
            usecols = None

            if op.selections:

                schema = ibis_schema_to_pandas_dtypes(table.schema)
                header = pd.read_csv(str(path),
                                     header=0,
                                     nrows=1,
                                     schema=schema)
                usecols = [
                    getattr(s.op(), 'name', None) or s.get_name()
                    for s in op.selections
                ]

                # we cannot read all the columns taht we would like
                if len(pd.Index(usecols) & header.columns) != len(usecols):
                    usecols = None

            df = pd.read_csv(str(path), usecols=usecols, header=0)
            ops[table] = df
    return ops
Esempio n. 3
0
File: csv.py Progetto: zdog234/ibis
def csv_pre_execute_selection(op, client, scope, **kwargs):
    tables = filter(lambda t: t not in scope, physical_tables(op.table.op()))

    ops = {}
    for table in tables:
        path = client.dictionary[table.name]
        usecols = None

        if op.selections:
            header = _read_csv(path, schema=table.schema, header=0, nrows=1)
            usecols = [
                getattr(s.op(), 'name', None) or s.get_name()
                for s in op.selections
            ]

            # we cannot read all the columns that we would like
            if len(pd.Index(usecols) & header.columns) != len(usecols):
                usecols = None

        ops[table] = _read_csv(path, table.schema, usecols=usecols, header=0)

    return ops
Esempio n. 4
0
File: csv.py Progetto: cloudera/ibis
def csv_pre_execute_selection(op, client, scope, **kwargs):
    tables = filter(lambda t: t not in scope, physical_tables(op.table.op()))

    ops = {}
    for table in tables:
        path = client.dictionary[table.name]
        usecols = None

        if op.selections:
            header = _read_csv(path, schema=table.schema, header=0, nrows=1)
            usecols = [
                getattr(s.op(), 'name', None) or s.get_name()
                for s in op.selections
            ]

            # we cannot read all the columns that we would like
            if len(pd.Index(usecols) & header.columns) != len(usecols):
                usecols = None

        ops[table] = _read_csv(path, table.schema, usecols=usecols, header=0)

    return ops