Example #1
0
def test_query_field_from_path():
    """
  Queries with SelectionOps that reference only fields
  parsed from the directory structre will rewrite
  the query so that the file list is filtered without 
  opening/decoding the files.
  """

    adapter = DirAdapter(employees=dict(root_dir="/",
                                        pattern="{department}",
                                        filename_column="path",
                                        decode="auto",
                                        schema=TEST_SCHEMA))

    op = SelectionOp(LoadOp('employees'),
                     EqOp(Var('department'), Const('sales')))

    loc = query_zipper(op).leftmost_descendant()

    res = adapter.evaluate(loc)
    relation = adapter.get_relation('employees')

    compare(
        res.root(),
        Function(
            'decode',
            SelectionOp(
                Function('extract_path',
                         Function('files', Const(relation.root_dir)),
                         Const(relation.root_dir + "{department}")),
                EqOp(Var('department'), Const('sales'))), Const('auto'),
            Const(TEST_SCHEMA), Const('path')))
Example #2
0
def test_query_field_in_payload():
    """
  Querying a field inside the payload should result
  in the LoadOp being rewritten as

  SelectionOp(Function('decode', Function('extract_path', Function('files'))))
  """

    adapter = DirAdapter(employees=dict(root_dir="/",
                                        pattern="{department}",
                                        filename_column="path",
                                        decode="auto",
                                        schema=TEST_SCHEMA))

    op = SelectionOp(LoadOp('employees'), GeOp(Var('salary'), Const(40000)))

    loc = query_zipper(op).leftmost_descendant()

    res = adapter.evaluate(loc)
    relation = adapter.get_relation('employees')

    compare(
        res.root(),
        SelectionOp(
            Function(
                'decode',
                Function('extract_path',
                         Function('files', Const(relation.root_dir)),
                         Const(relation.root_dir + "{department}")),
                Const('auto'), Const(TEST_SCHEMA), Const('path')),
            GeOp(Var('salary'), Const(40000))))
Example #3
0
def test_guess_schema():
    for department in ('engineering', 'sales', 'marketing'):
        sub_path = os.path.join(path, department)
        os.mkdir(sub_path)
        with open(os.path.join(sub_path, 'data.csv'), 'w') as f:
            f.write('column1, column2, column3\n')
            for x in range(1000):
                f.write('a, "b", 1\n')

    adapter = DirAdapter(
        employees=dict(root_dir=path, pattern="{department}", decode="auto"))

    schema = adapter.schema('employees')
Example #4
0
def test_query_field_from_path_and_contents():
    """
  Queries with SelectionOps that reference both  fields
  parsed from the directory structre and content will 
  rewrite the query so that the file list is filtered before 
  opening/decoding the files and finally filtered by the field
  from the content
  """

    adapter = DirAdapter(
        employees=dict(root_dir="/",
                       pattern="{department}",
                       filename_column="path",
                       decode="auto",
                       schema=dict(fields=[
                           dict(type='STRING', name='department'),
                           dict(type='INTEGER', name='id'),
                           dict(type='STRING', name='full_name'),
                           dict(type='INTEGER', name='salary'),
                           dict(type='INTEGER', name='manager_id'),
                       ])))

    op = SelectionOp(
        LoadOp('employees'),
        And(
            EqOp(Var('department'), Const('sales')),
            GeOp(Var('salary'), Const(40000)),
        ))

    loc = query_zipper(op).leftmost_descendant()

    res = adapter.evaluate(loc)
    relation = adapter.get_relation('employees')

    compare(
        res.root(),
        SelectionOp(
            Function(
                'decode',
                SelectionOp(
                    Function('extract_path',
                             Function('files', Const(relation.root_dir)),
                             Const(relation.root_dir + "{department}")),
                    EqOp(Var('department'), Const('sales'))), Const('auto'),
                Const(TEST_SCHEMA), Const('path')),
            GeOp(Var('salary'), Const(40000))))
Example #5
0
def test_evaluate():

    adapter = DirAdapter(songs=dict(
        root_dir=path,
        pattern="{artist}/{album}/{track}.{ext}",
        filename_column="path",
    ))
    relation = adapter.get_relation('songs')

    op = LoadOp('songs')
    loc = query_zipper(op).leftmost_descendant()

    res = adapter.evaluate(loc)

    compare(
        res.root(),
        Function('extract_path', Function('files', Const(relation.root_dir)),
                 Const(path + "/{artist}/{album}/{track}.{ext}")))