Exemple #1
0
  def test_row_as_table_row(self):
    schema_definition = [
        ('s', 'STRING'),
        ('i', 'INTEGER'),
        ('f', 'FLOAT'),
        ('b', 'BOOLEAN'),
        ('r', 'RECORD')]
    data_defination = [
        'abc',
        123,
        123.456,
        True,
        {'a': 'b'}]
    str_def = '{"s": "abc", "i": 123, "f": 123.456, "b": true, "r": {"a": "b"}}'
    schema = bigquery.TableSchema(
        fields=[bigquery.TableFieldSchema(name=k, type=v)
                for k, v in schema_definition])
    coder = TableRowJsonCoder(table_schema=schema)
    test_row = bigquery.TableRow(
        f=[bigquery.TableCell(v=to_json_value(e)) for e in data_defination])

    self.assertEqual(str_def, coder.encode(test_row))
    self.assertEqual(test_row, coder.decode(coder.encode(test_row)))
    # A coder without schema can still decode.
    self.assertEqual(
        test_row, TableRowJsonCoder().decode(coder.encode(test_row)))
Exemple #2
0
 def test_row_and_no_schema(self):
   coder = TableRowJsonCoder()
   test_row = bigquery.TableRow(
       f=[bigquery.TableCell(v=to_json_value(e))
          for e in ['abc', 123, 123.456, True]])
   with self.assertRaises(AttributeError) as ctx:
     coder.encode(test_row)
   self.assertTrue(
       ctx.exception.message.startswith('The TableRowJsonCoder requires'))
Exemple #3
0
 def json_compliance_exception(self, value):
   with self.assertRaises(ValueError) as exn:
     schema_definition = [('f', 'FLOAT')]
     schema = bigquery.TableSchema(
         fields=[bigquery.TableFieldSchema(name=k, type=v)
                 for k, v in schema_definition])
     coder = TableRowJsonCoder(table_schema=schema)
     test_row = bigquery.TableRow(
         f=[bigquery.TableCell(v=to_json_value(value))])
     coder.encode(test_row)
     self.assertTrue(bigquery.JSON_COMPLIANCE_ERROR in exn.exception.message)
Exemple #4
0
 def decode(self, encoded_table_row):
   od = json.loads(
       encoded_table_row, object_pairs_hook=collections.OrderedDict)
   return bigquery.TableRow(
       f=[bigquery.TableCell(v=to_json_value(e)) for e in od.itervalues()])
Exemple #5
0
    def get_test_rows(self):
        now = time.time()
        dt = datetime.datetime.utcfromtimestamp(float(now))
        ts = dt.strftime('%Y-%m-%d %H:%M:%S.%f UTC')
        expected_rows = [{
            'i':
            1,
            's':
            'abc',
            'f':
            2.3,
            'b':
            True,
            't':
            ts,
            'dt':
            '2016-10-31',
            'ts':
            '22:39:12.627498',
            'dt_ts':
            '2008-12-25T07:30:00',
            'r': {
                's2': 'b'
            },
            'rpr': [{
                's3': 'c',
                'rpr2': [{
                    'rs': ['d', 'e'],
                    's4': 'f'
                }]
            }]
        }, {
            'i': 10,
            's': 'xyz',
            'f': -3.14,
            'b': False,
            'rpr': []
        }]

        nested_schema = [
            bigquery.TableFieldSchema(name='s2',
                                      type='STRING',
                                      mode='NULLABLE')
        ]
        nested_schema_2 = [
            bigquery.TableFieldSchema(name='s3',
                                      type='STRING',
                                      mode='NULLABLE'),
            bigquery.TableFieldSchema(
                name='rpr2',
                type='RECORD',
                mode='REPEATED',
                fields=[
                    bigquery.TableFieldSchema(name='rs',
                                              type='STRING',
                                              mode='REPEATED'),
                    bigquery.TableFieldSchema(name='s4',
                                              type='STRING',
                                              mode='NULLABLE')
                ])
        ]

        schema = bigquery.TableSchema(fields=[
            bigquery.TableFieldSchema(
                name='b', type='BOOLEAN', mode='REQUIRED'),
            bigquery.TableFieldSchema(name='f', type='FLOAT', mode='REQUIRED'),
            bigquery.TableFieldSchema(
                name='i', type='INTEGER', mode='REQUIRED'),
            bigquery.TableFieldSchema(name='s', type='STRING',
                                      mode='REQUIRED'),
            bigquery.TableFieldSchema(
                name='t', type='TIMESTAMP', mode='NULLABLE'),
            bigquery.TableFieldSchema(name='dt', type='DATE', mode='NULLABLE'),
            bigquery.TableFieldSchema(name='ts', type='TIME', mode='NULLABLE'),
            bigquery.TableFieldSchema(
                name='dt_ts', type='DATETIME', mode='NULLABLE'),
            bigquery.TableFieldSchema(
                name='r', type='RECORD', mode='NULLABLE',
                fields=nested_schema),
            bigquery.TableFieldSchema(name='rpr',
                                      type='RECORD',
                                      mode='REPEATED',
                                      fields=nested_schema_2)
        ])

        table_rows = [
            bigquery.TableRow(f=[
                bigquery.TableCell(v=to_json_value('true')),
                bigquery.TableCell(v=to_json_value(str(2.3))),
                bigquery.TableCell(v=to_json_value(str(1))),
                bigquery.TableCell(v=to_json_value('abc')),
                # For timestamps cannot use str() because it will truncate the
                # number representing the timestamp.
                bigquery.TableCell(v=to_json_value('%f' % now)),
                bigquery.TableCell(v=to_json_value('2016-10-31')),
                bigquery.TableCell(v=to_json_value('22:39:12.627498')),
                bigquery.TableCell(v=to_json_value('2008-12-25T07:30:00')),
                # For record we cannot use dict because it doesn't create nested
                # schemas correctly so we have to use this f,v based format
                bigquery.TableCell(v=to_json_value({'f': [{
                    'v': 'b'
                }]})),
                bigquery.TableCell(v=to_json_value([{
                    'v': {
                        'f': [{
                            'v': 'c'
                        }, {
                            'v': [{
                                'v': {
                                    'f': [{
                                        'v': [{
                                            'v': 'd'
                                        }, {
                                            'v': 'e'
                                        }]
                                    }, {
                                        'v': 'f'
                                    }]
                                }
                            }]
                        }]
                    }
                }]))
            ]),
            bigquery.TableRow(f=[
                bigquery.TableCell(v=to_json_value('false')),
                bigquery.TableCell(v=to_json_value(str(-3.14))),
                bigquery.TableCell(v=to_json_value(str(10))),
                bigquery.TableCell(v=to_json_value('xyz')),
                bigquery.TableCell(v=None),
                bigquery.TableCell(v=None),
                bigquery.TableCell(v=None),
                bigquery.TableCell(v=None),
                bigquery.TableCell(v=None),
                bigquery.TableCell(v=to_json_value([]))
            ])
        ]
        return table_rows, schema, expected_rows