Example #1
0
def test_jar_not_found():
    spark_solid = create_spark_solid("spark_solid", main_class="something")
    # guid guaranteed to not exist
    run_config = yaml.safe_load(CONFIG_FILE.format(path=str(uuid.uuid4())))

    result = execute_solid(spark_solid,
                           run_config=run_config,
                           raise_on_error=False,
                           mode_def=MODE_DEF)
    assert result.failure_data
    assert ("does not exist. A valid jar must be built before running this op."
            in result.failure_data.error.cause.message)
Example #2
0
def test_jar_not_found():
    spark_solid = create_spark_solid('spark_solid', main_class='something')
    # guid guaranteed to not exist
    environment_dict = yaml.safe_load(
        CONFIG_FILE.format(path=str(uuid.uuid4())))

    result = execute_solid(spark_solid,
                           environment_dict=environment_dict,
                           raise_on_error=False,
                           mode_def=MODE_DEF)
    assert result.failure_data
    assert (
        'does not exist. A valid jar must be built before running this solid.'
        in result.failure_data.error.message)
Example #3
0
def event_ingest_pipeline():
    event_ingest = create_spark_solid(
        name='event_ingest',
        main_class='io.dagster.events.EventPipeline',
        description='Ingest events from JSON to Parquet',
    )

    @solid(input_defs=[InputDefinition('start', Nothing)], required_resource_keys={'snowflake'})
    def snowflake_load(context):
        # TODO: express dependency of this solid on event_ingest
        context.resources.snowflake.load_table_from_local_parquet(
            src='file:///tmp/dagster/events/data/output/2019/01/01/*.parquet', table='events'
        )

    snowflake_load(event_ingest(start=gunzipper(gzip_file=download_from_s3_to_file())))
Example #4
0
def test_no_spark_home():
    if "SPARK_HOME" in os.environ:
        del os.environ["SPARK_HOME"]

    spark_solid = create_spark_solid("spark_solid", main_class="something")
    run_config = yaml.safe_load(
        NO_SPARK_HOME_CONFIG_FILE.format(path=file_relative_path(__file__, "."))
    )

    result = execute_solid(
        spark_solid, run_config=run_config, raise_on_error=False, mode_def=MODE_DEF
    )
    assert result.failure_data
    assert (
        "No spark home set. You must either pass spark_home in config or set "
        "$SPARK_HOME in your environment (got None)." in result.failure_data.error.cause.message
    )
Example #5
0
def test_no_spark_home():
    if 'SPARK_HOME' in os.environ:
        del os.environ['SPARK_HOME']

    spark_solid = create_spark_solid('spark_solid', main_class='something')
    run_config = yaml.safe_load(
        NO_SPARK_HOME_CONFIG_FILE.format(
            path=file_relative_path(__file__, '.')))

    result = execute_solid(spark_solid,
                           run_config=run_config,
                           raise_on_error=False,
                           mode_def=MODE_DEF)
    assert result.failure_data
    assert (
        'No spark home set. You must either pass spark_home in config or set '
        '$SPARK_HOME in your environment (got None).'
        in result.failure_data.error.message)
Example #6
0
 def pipe():
     for solid_name in ["first_pi", "second_pi", "third_pi"]:
         create_spark_solid(
             solid_name, main_class="org.apache.spark.examples.SparkPi")()
Example #7
0
 def pipe():
     for solid_name in ['first_pi', 'second_pi', 'third_pi']:
         create_spark_solid(
             solid_name, main_class='org.apache.spark.examples.SparkPi')()