Exemple #1
0
 def test_row_and_no_schema(self):
   coder = TableRowJsonCoder()
   test_row = bigquery.TableRow(
       f=[bigquery.TableCell(v=to_json_value(e))
          for e in ['abc', 123, 123.456, True]])
   with self.assertRaisesRegexp(AttributeError,
                                r'^The TableRowJsonCoder requires'):
     coder.encode(test_row)
Exemple #2
0
 def json_compliance_exception(self, value):
   with self.assertRaisesRegexp(ValueError, re.escape(JSON_COMPLIANCE_ERROR)):
     schema_definition = [('f', 'FLOAT')]
     schema = bigquery.TableSchema(
         fields=[bigquery.TableFieldSchema(name=k, type=v)
                 for k, v in schema_definition])
     coder = TableRowJsonCoder(table_schema=schema)
     test_row = bigquery.TableRow(
         f=[bigquery.TableCell(v=to_json_value(value))])
     coder.encode(test_row)
Exemple #3
0
 def test_row_and_no_schema(self):
     coder = TableRowJsonCoder()
     test_row = bigquery.TableRow(f=[
         bigquery.TableCell(v=to_json_value(e))
         for e in ['abc', 123, 123.456, True]
     ])
     with self.assertRaises(AttributeError) as ctx:
         coder.encode(test_row)
     self.assertTrue(
         ctx.exception.message.startswith('The TableRowJsonCoder requires'))
  def test_row_as_table_row(self):
    schema_definition = [('s', 'STRING'), ('i', 'INTEGER'), ('f', 'FLOAT'),
                         ('b', 'BOOLEAN'), ('n', 'NUMERIC'), ('r', 'RECORD'),
                         ('g', 'GEOGRAPHY')]
    data_definition = [
        'abc',
        123,
        123.456,
        True,
        decimal.Decimal('987654321.987654321'), {
            'a': 'b'
        },
        'LINESTRING(1 2, 3 4, 5 6, 7 8)'
    ]
    str_def = (
        '{"s": "abc", '
        '"i": 123, '
        '"f": 123.456, '
        '"b": true, '
        '"n": "987654321.987654321", '
        '"r": {"a": "b"}, '
        '"g": "LINESTRING(1 2, 3 4, 5 6, 7 8)"}')
    schema = bigquery.TableSchema(
        fields=[
            bigquery.TableFieldSchema(name=k, type=v) for k,
            v in schema_definition
        ])
    coder = TableRowJsonCoder(table_schema=schema)

    def value_or_decimal_to_json(val):
      if isinstance(val, decimal.Decimal):
        return to_json_value(str(val))
      else:
        return to_json_value(val)

    test_row = bigquery.TableRow(
        f=[
            bigquery.TableCell(v=value_or_decimal_to_json(e))
            for e in data_definition
        ])

    self.assertEqual(str_def, coder.encode(test_row))
    self.assertEqual(test_row, coder.decode(coder.encode(test_row)))
    # A coder without schema can still decode.
    self.assertEqual(
        test_row, TableRowJsonCoder().decode(coder.encode(test_row)))
Exemple #5
0
    def test_row_as_table_row(self):
        schema_definition = [('s', 'STRING'), ('i', 'INTEGER'), ('f', 'FLOAT'),
                             ('b', 'BOOLEAN'), ('r', 'RECORD')]
        data_defination = ['abc', 123, 123.456, True, {'a': 'b'}]
        str_def = '{"s": "abc", "i": 123, "f": 123.456, "b": true, "r": {"a": "b"}}'
        schema = bigquery.TableSchema(fields=[
            bigquery.TableFieldSchema(name=k, type=v)
            for k, v in schema_definition
        ])
        coder = TableRowJsonCoder(table_schema=schema)
        test_row = bigquery.TableRow(f=[
            bigquery.TableCell(v=to_json_value(e)) for e in data_defination
        ])

        self.assertEqual(str_def, coder.encode(test_row))
        self.assertEqual(test_row, coder.decode(coder.encode(test_row)))
        # A coder without schema can still decode.
        self.assertEqual(test_row,
                         TableRowJsonCoder().decode(coder.encode(test_row)))
Exemple #6
0
 def decode(self, encoded_table_row):
     od = json.loads(encoded_table_row,
                     object_pairs_hook=collections.OrderedDict)
     return bigquery.TableRow(
         f=[bigquery.TableCell(v=to_json_value(e)) for e in itervalues(od)])
    def get_test_rows(self):
        now = time.time()
        dt = datetime.datetime.utcfromtimestamp(float(now))
        ts = dt.strftime('%Y-%m-%d %H:%M:%S.%f UTC')
        expected_rows = [{
            'i':
            1,
            's':
            'abc',
            'f':
            2.3,
            'b':
            True,
            't':
            ts,
            'dt':
            '2016-10-31',
            'ts':
            '22:39:12.627498',
            'dt_ts':
            '2008-12-25T07:30:00',
            'r': {
                's2': 'b'
            },
            'rpr': [{
                's3': 'c',
                'rpr2': [{
                    'rs': ['d', 'e'],
                    's4': None
                }]
            }]
        }, {
            'i': 10,
            's': 'xyz',
            'f': -3.14,
            'b': False,
            'rpr': [],
            't': None,
            'dt': None,
            'ts': None,
            'dt_ts': None,
            'r': None,
        }]

        nested_schema = [
            bigquery.TableFieldSchema(name='s2',
                                      type='STRING',
                                      mode='NULLABLE')
        ]
        nested_schema_2 = [
            bigquery.TableFieldSchema(name='s3',
                                      type='STRING',
                                      mode='NULLABLE'),
            bigquery.TableFieldSchema(
                name='rpr2',
                type='RECORD',
                mode='REPEATED',
                fields=[
                    bigquery.TableFieldSchema(name='rs',
                                              type='STRING',
                                              mode='REPEATED'),
                    bigquery.TableFieldSchema(name='s4',
                                              type='STRING',
                                              mode='NULLABLE')
                ])
        ]

        schema = bigquery.TableSchema(fields=[
            bigquery.TableFieldSchema(
                name='b', type='BOOLEAN', mode='REQUIRED'),
            bigquery.TableFieldSchema(name='f', type='FLOAT', mode='REQUIRED'),
            bigquery.TableFieldSchema(
                name='i', type='INTEGER', mode='REQUIRED'),
            bigquery.TableFieldSchema(name='s', type='STRING',
                                      mode='REQUIRED'),
            bigquery.TableFieldSchema(
                name='t', type='TIMESTAMP', mode='NULLABLE'),
            bigquery.TableFieldSchema(name='dt', type='DATE', mode='NULLABLE'),
            bigquery.TableFieldSchema(name='ts', type='TIME', mode='NULLABLE'),
            bigquery.TableFieldSchema(
                name='dt_ts', type='DATETIME', mode='NULLABLE'),
            bigquery.TableFieldSchema(
                name='r', type='RECORD', mode='NULLABLE',
                fields=nested_schema),
            bigquery.TableFieldSchema(name='rpr',
                                      type='RECORD',
                                      mode='REPEATED',
                                      fields=nested_schema_2)
        ])

        table_rows = [
            bigquery.TableRow(f=[
                bigquery.TableCell(v=to_json_value('true')),
                bigquery.TableCell(v=to_json_value(str(2.3))),
                bigquery.TableCell(v=to_json_value(str(1))),
                bigquery.TableCell(v=to_json_value('abc')),
                # For timestamps cannot use str() because it will truncate the
                # number representing the timestamp.
                bigquery.TableCell(v=to_json_value('%f' % now)),
                bigquery.TableCell(v=to_json_value('2016-10-31')),
                bigquery.TableCell(v=to_json_value('22:39:12.627498')),
                bigquery.TableCell(v=to_json_value('2008-12-25T07:30:00')),
                # For record we cannot use dict because it doesn't create nested
                # schemas correctly so we have to use this f,v based format
                bigquery.TableCell(v=to_json_value({'f': [{
                    'v': 'b'
                }]})),
                bigquery.TableCell(v=to_json_value([{
                    'v': {
                        'f': [{
                            'v': 'c'
                        }, {
                            'v': [{
                                'v': {
                                    'f': [{
                                        'v': [{
                                            'v': 'd'
                                        }, {
                                            'v': 'e'
                                        }]
                                    }, {
                                        'v': None
                                    }]
                                }
                            }]
                        }]
                    }
                }]))
            ]),
            bigquery.TableRow(f=[
                bigquery.TableCell(v=to_json_value('false')),
                bigquery.TableCell(v=to_json_value(str(-3.14))),
                bigquery.TableCell(v=to_json_value(str(10))),
                bigquery.TableCell(v=to_json_value('xyz')),
                bigquery.TableCell(v=None),
                bigquery.TableCell(v=None),
                bigquery.TableCell(v=None),
                bigquery.TableCell(v=None),
                bigquery.TableCell(v=None),
                # REPEATED field without any values.
                bigquery.TableCell(v=None)
            ])
        ]
        return table_rows, schema, expected_rows