def materialization_and_expectation(_context): yield Materialization.file(path='/path/to/foo', description='This is a table.') yield Materialization.file(path='/path/to/bar') yield ExpectationResult(success=True, label='row_count', description='passed') yield ExpectationResult(True) yield Output(True)
def df_output_schema(_context, path, value): with open(path, 'w') as fd: writer = csv.DictWriter(fd, fieldnames=value[0].keys()) writer.writeheader() writer.writerows(rowdicts=value) return Materialization.file(path)
def spark_df_output_schema(_context, file_type, file_options, spark_df): if file_type == 'csv': spark_df.write.csv(file_options['path'], header=file_options.get('header'), sep=file_options.get('sep')) return Materialization.file(file_options['path']) else: check.failed('Unsupported file type: {}'.format(file_type))
def spark_df_materializer(_context, config, spark_df): file_type, file_options = list(config.items())[0] if file_type == 'csv': spark_df.write.csv(**file_options) return Materialization.file(file_options['path']) elif file_type == 'parquet': spark_df.write.parquet(**file_options) return Materialization.file(file_options['path']) elif file_type == 'json': spark_df.write.json(**file_options) return Materialization.file(file_options['path']) elif file_type == 'jdbc': spark_df.write.jdbc(**file_options) return Materialization.file(file_options['url']) elif file_type == 'orc': spark_df.write.orc(**file_options) return Materialization.file(file_options['path']) elif file_type == 'saveAsTable': spark_df.write.saveAsTable(**file_options) return Materialization.file(file_options['name']) elif file_type == 'text': spark_df.write.text(**file_options) return Materialization.file(file_options['path']) else: check.failed('Unsupported file type: {}'.format(file_type))
def dataframe_output_schema(_context, file_type, file_options, pandas_df): check.str_param(file_type, 'file_type') check.dict_param(file_options, 'file_options') check.inst_param(pandas_df, 'pandas_df', DataFrame) if file_type == 'csv': path = file_options['path'] pandas_df.to_csv(path, index=False, **dict_without_keys(file_options, 'path')) elif file_type == 'parquet': pandas_df.to_parquet(file_options['path']) elif file_type == 'table': pandas_df.to_csv(file_options['path'], sep='\t', index=False) else: check.failed('Unsupported file_type {file_type}'.format(file_type=file_type)) return Materialization.file(file_options['path'])
def dataframe_materializer(_context, config, pandas_df): check.inst_param(pandas_df, 'pandas_df', pd.DataFrame) file_type, file_options = list(config.items())[0] if file_type == 'csv': path = file_options['path'] pandas_df.to_csv(path, index=False, **dict_without_keys(file_options, 'path')) elif file_type == 'parquet': pandas_df.to_parquet(file_options['path']) elif file_type == 'table': pandas_df.to_csv(file_options['path'], sep='\t', index=False) else: check.failed( 'Unsupported file_type {file_type}'.format(file_type=file_type)) return Materialization.file(file_options['path'])
def spark_df_output_schema(_context, file_type, file_options, spark_df): if file_type == 'csv': spark_df.write.csv(**file_options) return Materialization.file(file_options['path']) elif file_type == 'parquet': spark_df.write.parquet(**file_options) return Materialization.file(file_options['path']) elif file_type == 'json': spark_df.write.json(**file_options) return Materialization.file(file_options['path']) elif file_type == 'jdbc': spark_df.write.jdbc(**file_options) return Materialization.file(file_options['url']) elif file_type == 'orc': spark_df.write.orc(**file_options) return Materialization.file(file_options['path']) elif file_type == 'saveAsTable': spark_df.write.saveAsTable(**file_options) return Materialization.file(file_options['name']) elif file_type == 'text': spark_df.write.text(**file_options) return Materialization.file(file_options['path']) else: check.failed('Unsupported file type: {}'.format(file_type))
def materialize(self, _context, table_type, table_metadata, value): path = self._path_for_table(table_type, table_metadata) value.write.parquet(path=path, mode='overwrite') return Materialization.file(path), None
def emit_nothing(_context): yield Materialization.file(path='/path/')
def yield_stuff(_context): yield Materialization.file('/path/to/nowhere')
def materialize(self, _context, table_type, _table_metadata, value): path = self._path_for_table(table_type) value.write.csv(path=path, header=True, mode='overwrite') return Materialization.file(path), None
def write_sauce(_context, path, sauce): with open(path, 'w+') as fd: fd.write(sauce.flavor) return Materialization.file(path)
def test_out_of_pipeline_manager_yield_materialization(): manager = Manager() assert manager.yield_event( Materialization.file('/path/to/artifact', 'artifact') ) == Materialization.file('/path/to/artifact', 'artifact')