Ejemplo n.º 1
0
  def test_unsupported_schema(self):
    """Test supported schema values.

    Note that not all valid BQ schema values are valid/used in the structured
    data package
    """

    unsupported_col_types = ['bytes', 'boolean', 'timestamp', 'date', 'time',
                             'datetime', 'record']
    for col_type in unsupported_col_types:
      schema = 'col_name:%s' % col_type

      job = core_sd.analyze_async(
        'some_dir',
        dlml.CsvDataSet(
            file_pattern=['file1.txt'],
            schema=schema),
        cloud=False).wait()
      self.assertIn('Schema contains an unsupported type %s.' % col_type,
                    job.fatal_error.message)

      job = core_sd.analyze_async(
        'gs://some_dir',
        dlml.CsvDataSet(
            file_pattern=['gs://file1.txt'],
            schema=schema),
        cloud=True,
        project_id='junk_project_id').wait()
      self.assertIn('Schema contains an unsupported type %s.' % col_type,
                    job.fatal_error.message)
Ejemplo n.º 2
0
 def test_cloud_but_local_files(self):
   job = core_sd.analyze_async(
       'gs://some_dir',
       dlml.CsvDataSet(
           file_pattern=['file1.txt'],
           schema='col1:STRING,col2:INTEGER,col3:FLOAT'),
       project_id='project_id',
       cloud=True).wait()
   self.assertIn('File file1.txt is not a gcs path', job.fatal_error.message)
Ejemplo n.º 3
0
 def test_csvdataset_one_file(self):
   """Test CsvDataSet has only one file/pattern"""
   # TODO(brandondutra) remove this restriction
   job = core_sd.analyze_async(
       'some_dir',
       dlml.CsvDataSet(
           file_pattern=['file1.txt', 'file2.txt'],
           schema='col1:STRING,col2:INTEGER,col3:FLOAT')).wait()
   self.assertIn('should be built with a file pattern',
                 job.fatal_error.message)
Ejemplo n.º 4
0
 def test_projectid(self):
   """Test passing project id but cloud is false"""
   job = core_sd.analyze_async(
       'some_dir',
       dlml.CsvDataSet(
           file_pattern=['file1.txt'],
           schema='col1:STRING,col2:INTEGER,col3:FLOAT'),
       project_id='project_id').wait()
   self.assertIn('project_id only needed if cloud is True',
                 job.fatal_error.message)
Ejemplo n.º 5
0
 def test_not_csvdataset(self):
   """Test csvdataset is used"""
   # not a CsvDataSet
   job = core_sd.analyze_async('some_dir', 'some_file.txt').wait()
   self.assertIn('Only CsvDataSet is supported', job.fatal_error.message)