Ejemplo n.º 1
0
 def materialization_and_expectation(_context):
     yield Materialization.file(path='/path/to/foo',
                                description='This is a table.')
     yield Materialization.file(path='/path/to/bar')
     yield ExpectationResult(success=True,
                             label='row_count',
                             description='passed')
     yield ExpectationResult(True)
     yield Output(True)
Ejemplo n.º 2
0
def df_output_schema(_context, path, value):
    with open(path, 'w') as fd:
        writer = csv.DictWriter(fd, fieldnames=value[0].keys())
        writer.writeheader()
        writer.writerows(rowdicts=value)

    return Materialization.file(path)
Ejemplo n.º 3
0
def spark_df_output_schema(_context, file_type, file_options, spark_df):
    if file_type == 'csv':
        spark_df.write.csv(file_options['path'],
                           header=file_options.get('header'),
                           sep=file_options.get('sep'))
        return Materialization.file(file_options['path'])
    else:
        check.failed('Unsupported file type: {}'.format(file_type))
Ejemplo n.º 4
0
def spark_df_materializer(_context, config, spark_df):
    file_type, file_options = list(config.items())[0]

    if file_type == 'csv':
        spark_df.write.csv(**file_options)
        return Materialization.file(file_options['path'])
    elif file_type == 'parquet':
        spark_df.write.parquet(**file_options)
        return Materialization.file(file_options['path'])
    elif file_type == 'json':
        spark_df.write.json(**file_options)
        return Materialization.file(file_options['path'])
    elif file_type == 'jdbc':
        spark_df.write.jdbc(**file_options)
        return Materialization.file(file_options['url'])
    elif file_type == 'orc':
        spark_df.write.orc(**file_options)
        return Materialization.file(file_options['path'])
    elif file_type == 'saveAsTable':
        spark_df.write.saveAsTable(**file_options)
        return Materialization.file(file_options['name'])
    elif file_type == 'text':
        spark_df.write.text(**file_options)
        return Materialization.file(file_options['path'])
    else:
        check.failed('Unsupported file type: {}'.format(file_type))
Ejemplo n.º 5
0
def dataframe_output_schema(_context, file_type, file_options, pandas_df):
    check.str_param(file_type, 'file_type')
    check.dict_param(file_options, 'file_options')
    check.inst_param(pandas_df, 'pandas_df', DataFrame)

    if file_type == 'csv':
        path = file_options['path']
        pandas_df.to_csv(path, index=False, **dict_without_keys(file_options, 'path'))
    elif file_type == 'parquet':
        pandas_df.to_parquet(file_options['path'])
    elif file_type == 'table':
        pandas_df.to_csv(file_options['path'], sep='\t', index=False)
    else:
        check.failed('Unsupported file_type {file_type}'.format(file_type=file_type))

    return Materialization.file(file_options['path'])
Ejemplo n.º 6
0
def dataframe_materializer(_context, config, pandas_df):
    check.inst_param(pandas_df, 'pandas_df', pd.DataFrame)
    file_type, file_options = list(config.items())[0]

    if file_type == 'csv':
        path = file_options['path']
        pandas_df.to_csv(path,
                         index=False,
                         **dict_without_keys(file_options, 'path'))
    elif file_type == 'parquet':
        pandas_df.to_parquet(file_options['path'])
    elif file_type == 'table':
        pandas_df.to_csv(file_options['path'], sep='\t', index=False)
    else:
        check.failed(
            'Unsupported file_type {file_type}'.format(file_type=file_type))

    return Materialization.file(file_options['path'])
Ejemplo n.º 7
0
def spark_df_output_schema(_context, file_type, file_options, spark_df):
    if file_type == 'csv':
        spark_df.write.csv(**file_options)
        return Materialization.file(file_options['path'])
    elif file_type == 'parquet':
        spark_df.write.parquet(**file_options)
        return Materialization.file(file_options['path'])
    elif file_type == 'json':
        spark_df.write.json(**file_options)
        return Materialization.file(file_options['path'])
    elif file_type == 'jdbc':
        spark_df.write.jdbc(**file_options)
        return Materialization.file(file_options['url'])
    elif file_type == 'orc':
        spark_df.write.orc(**file_options)
        return Materialization.file(file_options['path'])
    elif file_type == 'saveAsTable':
        spark_df.write.saveAsTable(**file_options)
        return Materialization.file(file_options['name'])
    elif file_type == 'text':
        spark_df.write.text(**file_options)
        return Materialization.file(file_options['path'])
    else:
        check.failed('Unsupported file type: {}'.format(file_type))
Ejemplo n.º 8
0
 def materialize(self, _context, table_type, table_metadata, value):
     path = self._path_for_table(table_type, table_metadata)
     value.write.parquet(path=path, mode='overwrite')
     return Materialization.file(path), None
Ejemplo n.º 9
0
 def emit_nothing(_context):
     yield Materialization.file(path='/path/')
Ejemplo n.º 10
0
 def yield_stuff(_context):
     yield Materialization.file('/path/to/nowhere')
Ejemplo n.º 11
0
 def materialize(self, _context, table_type, _table_metadata, value):
     path = self._path_for_table(table_type)
     value.write.csv(path=path, header=True, mode='overwrite')
     return Materialization.file(path), None
Ejemplo n.º 12
0
def write_sauce(_context, path, sauce):
    with open(path, 'w+') as fd:
        fd.write(sauce.flavor)
    return Materialization.file(path)
Ejemplo n.º 13
0
def test_out_of_pipeline_manager_yield_materialization():
    manager = Manager()
    assert manager.yield_event(
        Materialization.file('/path/to/artifact', 'artifact')
    ) == Materialization.file('/path/to/artifact', 'artifact')