예제 #1
0
  def test_row_as_table_row(self):
    schema_definition = [
        ('s', 'STRING'),
        ('i', 'INTEGER'),
        ('f', 'FLOAT'),
        ('b', 'BOOLEAN'),
        ('r', 'RECORD')]
    data_defination = [
        'abc',
        123,
        123.456,
        True,
        {'a': 'b'}]
    str_def = '{"s": "abc", "i": 123, "f": 123.456, "b": true, "r": {"a": "b"}}'
    schema = bigquery.TableSchema(
        fields=[bigquery.TableFieldSchema(name=k, type=v)
                for k, v in schema_definition])
    coder = TableRowJsonCoder(table_schema=schema)
    test_row = bigquery.TableRow(
        f=[bigquery.TableCell(v=to_json_value(e)) for e in data_defination])

    self.assertEqual(str_def, coder.encode(test_row))
    self.assertEqual(test_row, coder.decode(coder.encode(test_row)))
    # A coder without schema can still decode.
    self.assertEqual(
        test_row, TableRowJsonCoder().decode(coder.encode(test_row)))
예제 #2
0
 def test_row_and_no_schema(self):
   coder = TableRowJsonCoder()
   test_row = bigquery.TableRow(
       f=[bigquery.TableCell(v=to_json_value(e))
          for e in ['abc', 123, 123.456, True]])
   with self.assertRaises(AttributeError) as ctx:
     coder.encode(test_row)
   self.assertTrue(
       ctx.exception.message.startswith('The TableRowJsonCoder requires'))
예제 #3
0
 def json_compliance_exception(self, value):
   with self.assertRaises(ValueError) as exn:
     schema_definition = [('f', 'FLOAT')]
     schema = bigquery.TableSchema(
         fields=[bigquery.TableFieldSchema(name=k, type=v)
                 for k, v in schema_definition])
     coder = TableRowJsonCoder(table_schema=schema)
     test_row = bigquery.TableRow(
         f=[bigquery.TableCell(v=to_json_value(value))])
     coder.encode(test_row)
     self.assertTrue(bigquery.JSON_COMPLIANCE_ERROR in exn.exception.message)
예제 #4
0
파일: bigquery.py 프로젝트: jyucoeng/beam
 def decode(self, encoded_table_row):
   od = json.loads(
       encoded_table_row, object_pairs_hook=collections.OrderedDict)
   return bigquery.TableRow(
       f=[bigquery.TableCell(v=to_json_value(e)) for e in od.itervalues()])
예제 #5
0
  def get_test_rows(self):
    now = time.time()
    dt = datetime.datetime.utcfromtimestamp(float(now))
    ts = dt.strftime('%Y-%m-%d %H:%M:%S.%f UTC')
    expected_rows = [
        {
            'i': 1,
            's': 'abc',
            'f': 2.3,
            'b': True,
            't': ts,
            'dt': '2016-10-31',
            'ts': '22:39:12.627498',
            'dt_ts': '2008-12-25T07:30:00',
            'r': {'s2': 'b'},
            'rpr': [{'s3': 'c', 'rpr2': [{'rs': ['d', 'e'], 's4': None}]}]
        },
        {
            'i': 10,
            's': 'xyz',
            'f': -3.14,
            'b': False,
            'rpr': [],
            't': None,
            'dt': None,
            'ts': None,
            'dt_ts': None,
            'r': None,
        }]

    nested_schema = [
        bigquery.TableFieldSchema(
            name='s2', type='STRING', mode='NULLABLE')]
    nested_schema_2 = [
        bigquery.TableFieldSchema(
            name='s3', type='STRING', mode='NULLABLE'),
        bigquery.TableFieldSchema(
            name='rpr2', type='RECORD', mode='REPEATED', fields=[
                bigquery.TableFieldSchema(
                    name='rs', type='STRING', mode='REPEATED'),
                bigquery.TableFieldSchema(
                    name='s4', type='STRING', mode='NULLABLE')])]

    schema = bigquery.TableSchema(
        fields=[
            bigquery.TableFieldSchema(
                name='b', type='BOOLEAN', mode='REQUIRED'),
            bigquery.TableFieldSchema(
                name='f', type='FLOAT', mode='REQUIRED'),
            bigquery.TableFieldSchema(
                name='i', type='INTEGER', mode='REQUIRED'),
            bigquery.TableFieldSchema(
                name='s', type='STRING', mode='REQUIRED'),
            bigquery.TableFieldSchema(
                name='t', type='TIMESTAMP', mode='NULLABLE'),
            bigquery.TableFieldSchema(
                name='dt', type='DATE', mode='NULLABLE'),
            bigquery.TableFieldSchema(
                name='ts', type='TIME', mode='NULLABLE'),
            bigquery.TableFieldSchema(
                name='dt_ts', type='DATETIME', mode='NULLABLE'),
            bigquery.TableFieldSchema(
                name='r', type='RECORD', mode='NULLABLE',
                fields=nested_schema),
            bigquery.TableFieldSchema(
                name='rpr', type='RECORD', mode='REPEATED',
                fields=nested_schema_2)])

    table_rows = [
        bigquery.TableRow(f=[
            bigquery.TableCell(v=to_json_value('true')),
            bigquery.TableCell(v=to_json_value(str(2.3))),
            bigquery.TableCell(v=to_json_value(str(1))),
            bigquery.TableCell(v=to_json_value('abc')),
            # For timestamps cannot use str() because it will truncate the
            # number representing the timestamp.
            bigquery.TableCell(v=to_json_value('%f' % now)),
            bigquery.TableCell(v=to_json_value('2016-10-31')),
            bigquery.TableCell(v=to_json_value('22:39:12.627498')),
            bigquery.TableCell(v=to_json_value('2008-12-25T07:30:00')),
            # For record we cannot use dict because it doesn't create nested
            # schemas correctly so we have to use this f,v based format
            bigquery.TableCell(v=to_json_value({'f': [{'v': 'b'}]})),
            bigquery.TableCell(v=to_json_value([{'v':{'f':[{'v': 'c'}, {'v':[
                {'v':{'f':[{'v':[{'v':'d'}, {'v':'e'}]}, {'v':None}]}}]}]}}]))
            ]),
        bigquery.TableRow(f=[
            bigquery.TableCell(v=to_json_value('false')),
            bigquery.TableCell(v=to_json_value(str(-3.14))),
            bigquery.TableCell(v=to_json_value(str(10))),
            bigquery.TableCell(v=to_json_value('xyz')),
            bigquery.TableCell(v=None),
            bigquery.TableCell(v=None),
            bigquery.TableCell(v=None),
            bigquery.TableCell(v=None),
            bigquery.TableCell(v=None),
            bigquery.TableCell(v=to_json_value([]))])]
    return table_rows, schema, expected_rows