def test_select():
    dataset = mock_data_set()

    qb = QueryBuilder(dataset)
    eq_(qb.column_exps, '*')
    qb_w_select = qb.select('x,y')

    assert_is_not(qb, qb_w_select)
    eq_(qb_w_select.column_exps, 'x,y')

    qb_w_select_and_from = qb_w_select.frm('bogus')

    q = qb_w_select_and_from.query

    assert_is_instance(q, Query)

    assert_sequence_equal(q.schema.fields, [
        Field(name="x", type="INTEGER", schema_name="bogus"),
        Field(name="y", type="INTEGER", schema_name="bogus")
    ])

    compare(q.operations, ProjectionOp(LoadOp('bogus'), Var('x'), Var('y')))

    qb_select_y_from_bogus = qb.select('y').frm('bogus')
    eq_(qb_select_y_from_bogus.column_exps, 'y')

    assert_sequence_equal(
        qb_select_y_from_bogus.query.schema.fields,
        [Field(name="y", type="INTEGER", schema_name="bogus")])

    compare(qb_select_y_from_bogus.query.operations,
            ProjectionOp(LoadOp('bogus'), Var('y')))
Example #2
0
def test_dict_adapter_with_schemas():
  adapter = DictAdapter(
    employees = dict(
      schema = dict(
        fields=[
          dict(name="employee_id", type="INTEGER"),
          dict(name="full_name", type="STRING"),
          dict(name="employment_date", type="DATE"),
          dict(name="manager_id", type="INTEGER")
        ]
      ),
      rows = employee_records
    )
  )

  employees = adapter.get_relation('employees')

  assert_sequence_equal(
    employees.schema.fields,
    [
      Field(name="employee_id", type="INTEGER", schema_name="employees"),
      Field(name="full_name", type="STRING", schema_name="employees"),
      Field(name="employment_date", type="DATE", schema_name="employees"),
      Field(name="manager_id", type="INTEGER", schema_name="employees")  
    ]
  )

  assert_sequence_equal(
    list(employees),
    [
      (1234, 'Tom Tompson', date(2009, 1, 17), None),
      (4567, 'Sally Sanders', date(2010, 2, 24), 1234),
      (8901, 'Mark Markty', date(2010, 3, 1), 1234)
    ]
  )
Example #3
0
def test_decode_csv():
    stream = StringIO(u"field1,field2,field3\nfoo,1,0\nbaz,2,0")

    schema = codecs.schema_from(stream, mime_type='text/csv')

    expected = Schema([
        Field(name='field1', type='STRING'),
        Field(name='field2', type='STRING'),
        Field(name='field3', type='STRING')
    ])

    eq_(schema, expected)
Example #4
0
  def __init__(self,  name, root_dir, **options):
    self.name = name

    if not root_dir.endswith('/'):
      root_dir += '/'
    self.root_dir = root_dir

    self.pattern  = options.pop('pattern', None)

    if self.pattern:
      tokens = tokenize_pattern(self.pattern)
      self.path_schema = Schema([
        Field(name=c, type="STRING")
        for c in columns(tokens)
      ])



    self.content_column = options.pop('content_column', None)
    self.filename_column = options.pop('filename_column', None)
  
    self.decode = options.pop('decode', "none")

    schema = options.pop('schema',None)
    if isinstance(schema, Schema):
      self.schema = schema
    else:
      self.schema = schema and Schema(**schema)

    if options:
      raise ValueError("Unrecognized options {}".format(options.keys()))
Example #5
0
    def test_index_repeating_scalar(self):
        field = Field(name="count", type="INTEGER", mode="REPEATED")

        results = list(features.index_repeating_scalar(field, [1, 2, 3], ''))

        assert_sequence_equal(results, [('count', 1), ('count', 2),
                                        ('count', 3)])
Example #6
0
    def test_index_record(self):
        field = Field(name="point",
                      type="RECORD",
                      fields=[
                          dict(name="x", type="INTEGER"),
                          dict(name="y", type="INTEGER")
                      ])

        results = list(features.index_record(field, dict(x=10, y=1), ''))

        assert_sequence_equal(results, [('point.x', 10), ('point.y', 1)])
Example #7
0
def test_decode_csv():
  stream = StringIO("field1,field2,field3\nfoo,1,0\nbaz,2,0")

  schema = codecs.schema_from(stream, mime_type='text/csv')
  
  eq_(
     schema,
     Schema([
       Field(name='field1', type='STRING'),
       Field(name='field2', type='STRING'),
       Field(name='field3', type='STRING')
     ])
  )


  relation = codecs.relation_from(stream, mime_type='text/csv')

  assert_sequence_equal(
    list(relation),
    [
      ['foo','1','0'],
      ['baz','2','0']
    ]
  )
Example #8
0
    def test_repeating_index_record(self):
        field = Field(name="point",
                      type="RECORD",
                      mode="REPEATED",
                      fields=[
                          dict(name="x", type="INTEGER"),
                          dict(name="y", type="INTEGER")
                      ])

        results = list(
            features.index_repeating_record(
                field, [dict(x=1, y=1), dict(x=2, y=2)], ''))

        assert_sequence_equal(results, [('point.x', 1), ('point.y', 1),
                                        ('point.x', 2), ('point.y', 2)])
def test_projection_and_selection():
    dataset = mock_data_set()

    qb = QueryBuilder(dataset).select('full_name').frm('employees').where(
        'employee_id = 123')

    query = qb.query
    assert_equal(
        query.operations,
        ProjectionOp(
            SelectionOp(LoadOp('employees'),
                        EqOp(Var('employee_id'), NumberConst(123))),
            Var('full_name')))

    assert_sequence_equal(
        query.schema.fields,
        [Field(name="full_name", type="STRING", schema_name="employees")])
Example #10
0
def dataset(source_control, docker_client, containers):

    dataset = splicer.DataSet()
    dataset.add_aggregate(
        "maxwhen",
        func=maxwhen,
        returns=Field(name="min", type="STRING"),
        initial=(None, None),
        finalize=lambda state: state[0]
    )

    # data collected at the start of the program
    static_data = DictAdapter(
        branch=dict(
            schema=dict(fields=COMMIT_SCHEMA),
            rows=branches(source_control)
        ),
        image=dict(
            schema=dict(fields=IMAGE_SCHEMA),
            rows=images(docker_client, containers)
        )
    )

    dataset.add_adapter(static_data)

    # splicer doesn't have select distinct yet.. use group by instead
    query = """
    select branch.branch, image, maxwhen(branch.commit,
           branch.rel_commit) as commit,
           max(branch.rel_commit) as rel_commit
    from image join branch on image.tag = branch.commit
    group by branch, image
    union all
    select branch, null as image, branch as commit, -1 from branch
    group by branch
    """

    dataset.create_view(
        'latest_commit',
        query,
    )
    return dataset
Example #11
0
    def test_index_scalar(self):
        field = Field(name="count", type="INTEGER")

        results = list(features.index_scalar(field, 2, ''))

        assert_sequence_equal(results, [('count', 2)])