def test_dict_roundtrip():
    handle = SolidHandle('baz', SolidHandle('bar', SolidHandle('foo', None)))
    assert SolidHandle.from_dict(json.loads(json.dumps(
        handle._asdict()))) == handle

    handle = SolidHandle('foo', None)
    assert SolidHandle.from_dict(json.loads(json.dumps(
        handle._asdict()))) == handle
Exemple #2
0
def test_dict_roundtrip():
    handle = NodeHandle("baz", NodeHandle("bar", NodeHandle("foo", None)))
    assert NodeHandle.from_dict(json.loads(json.dumps(
        handle._asdict()))) == handle

    handle = NodeHandle("foo", None)
    assert NodeHandle.from_dict(json.loads(json.dumps(
        handle._asdict()))) == handle
Exemple #3
0
    def launch_run(self, instance, run, external_pipeline):
        '''Subclasses must implement this method.'''

        check.inst_param(run, 'run', PipelineRun)
        check.inst_param(external_pipeline, 'external_pipeline',
                         ExternalPipeline)

        # We use the IPC subprocess machinery here because we want to be able to interrupt this
        # process. We could use multiprocessing and a thread to poll a shared multiprocessing.Event,
        # interrupting when the event is set, but in this case that's six of one, half a dozen of
        # the other.
        process = open_ipc_subprocess([
            sys.executable,
            file_relative_path(__file__, 'sync_cli_api_execute_run.py'),
            json.dumps({
                'instance_ref':
                serialize_dagster_namedtuple(self._instance.get_ref()),
                'pipeline_origin':
                serialize_dagster_namedtuple(external_pipeline.get_origin()),
                'pipeline_run_id':
                run.run_id,
            }),
        ])

        with self._processes_lock:
            self._living_process_by_run_id[run.run_id] = process

        return run
Exemple #4
0
def test_graphql_get(instance, test_client: TestClient):
    # base case
    response = test_client.get(
        "/graphql",
        params={"query": "{__typename}"},
    )
    assert response.status_code == 200, response.text
    assert response.json() == {"data": {"__typename": "DagitQuery"}}

    # missing
    response = test_client.get("/graphql")
    assert response.status_code == 400, response.text

    # variables
    var_str = json.dumps({"runId": "missing"})
    response = test_client.get(
        "/graphql",
        params={"query": RUN_QUERY, "variables": var_str},
    )
    assert response.status_code == 200, response.text
    assert response.json() == {"data": {"pipelineRunOrError": {"__typename": "RunNotFoundError"}}}

    # malformed vars
    response = test_client.get(
        "/graphql",
        params={
            "query": RUN_QUERY,
            "variables": var_str[:-2],  # malform by trimming
        },
    )
    assert response.status_code == 400, response.text
Exemple #5
0
def test_asset_multi_wipe(asset_instance):
    runner = CliRunner()
    execute_pipeline(pipeline_one, instance=asset_instance)
    execute_pipeline(pipeline_two, instance=asset_instance)
    asset_keys = asset_instance.all_asset_keys()
    assert len(asset_keys) == 4

    result = runner.invoke(
        asset_wipe_command,
        [json.dumps(["path", "to", "asset_3"]),
         json.dumps(["asset_1"])],
        input="DELETE\n",
    )
    assert result.exit_code == 0
    assert "Removed asset indexes from event logs" in result.output
    asset_keys = asset_instance.all_asset_keys()
    assert len(asset_keys) == 2
Exemple #6
0
        def send_wrapper(message: Message):
            if message["type"] == "http.response.start":
                counter = traced_counter.get()
                if counter and isinstance(counter, Counter):
                    headers = MutableHeaders(scope=message)
                    headers.append("x-dagster-call-counts", json.dumps(counter.counts()))

            return send(message)
def test_asset_wipe_errors(asset_instance):  # pylint: disable=unused-argument
    runner = CliRunner()
    result = runner.invoke(asset_wipe_command)
    assert result.exit_code == 2
    assert (
        "Error, you must specify an asset key or use `--all` to wipe all asset keys."
        in result.output
    )

    result = runner.invoke(asset_wipe_command, ["--all", json.dumps(["path", "to", "asset_key"])])
    assert result.exit_code == 2
    assert "Error, cannot use more than one of: asset key, `--all`." in result.output
Exemple #8
0
def test_asset_single_wipe_noprompt(asset_instance):
    runner = CliRunner()
    execute_pipeline(pipeline_one, instance=asset_instance)
    execute_pipeline(pipeline_two, instance=asset_instance)
    asset_keys = asset_instance.all_asset_keys()
    assert len(asset_keys) == 4

    result = runner.invoke(
        asset_wipe_command,
        ["--noprompt", json.dumps(["path", "to", "asset_3"])])
    assert result.exit_code == 0
    assert "Removed asset indexes from event logs" in result.output

    asset_keys = asset_instance.all_asset_keys()
    assert len(asset_keys) == 3
Exemple #9
0
def test_graphql_post(test_client: TestClient):
    # base case
    response = test_client.post(
        "/graphql",
        params={"query": "{__typename}"},
    )
    assert response.status_code == 200, response.text
    assert response.json() == {"data": {"__typename": "DagitQuery"}}

    # missing
    response = test_client.post("/graphql")
    assert response.status_code == 400, response.text

    # variables
    var_str = json.dumps({"runId": "missing"})
    response = test_client.post(
        "/graphql",
        params={"query": RUN_QUERY, "variables": var_str},
    )
    assert response.status_code == 200, response.text
    assert response.json() == {"data": {"pipelineRunOrError": {"__typename": "RunNotFoundError"}}}

    # malformed vars
    response = test_client.post(
        "/graphql",
        params={
            "query": RUN_QUERY,
            "variables": var_str[:-2],  # malform by trimming
        },
    )
    assert response.status_code == 400, response.text

    # application/json
    response = test_client.post(
        "/graphql",
        json={"query": RUN_QUERY, "variables": {"runId": "missing"}},
    )
    assert response.status_code == 200, response.text
    assert response.json() == {"data": {"pipelineRunOrError": {"__typename": "RunNotFoundError"}}}

    # application/graphql
    response = test_client.post(
        "/graphql",
        data="{__typename}",
        headers={"Content-type": "application/graphql"},
    )
    assert response.status_code == 200, response.text
    assert response.json() == {"data": {"__typename": "DagitQuery"}}
Exemple #10
0
def dataproc_solid(context):
    job_config = context.solid_config['job_config']

    context.log.info('submitting job with config: %s' % str(json.dumps(job_config)))

    if context.solid_config['job_scoped_cluster']:
        # Cluster context manager, creates and then deletes cluster
        with context.resources.dataproc.cluster_context_manager() as cluster:
            # Submit the job specified by this solid to the cluster defined by the associated resource
            result = cluster.submit_job(job_config)

            job_id = result['reference']['jobId']
            context.log.info('Submitted job ID {}'.format(job_id))
            cluster.wait_for_job(job_id)
    else:
        # Submit to an existing cluster
        # Submit the job specified by this solid to the cluster defined by the associated resource
        result = context.resources.dataproc.submit_job(job_config)

        job_id = result['reference']['jobId']
        context.log.info('Submitted job ID {}'.format(job_id))
        context.resources.dataproc.wait_for_job(job_id)
Exemple #11
0
    def construct_job(self, run):
        check.inst_param(run, 'run', PipelineRun)

        dagster_labels = {
            'app.kubernetes.io/name': 'dagster',
            'app.kubernetes.io/instance': 'dagster',
            'app.kubernetes.io/version': dagster_version,
        }

        execution_params = execution_params_from_pipeline_run(run)

        job_container = client.V1Container(
            name='dagster-job-%s' % run.run_id,
            image=self.job_image,
            command=['dagster-graphql'],
            args=[
                "-p",
                "startPipelineExecution",
                "-v",
                json.dumps(
                    {'executionParams': execution_params.to_graphql_input()}),
            ],
            image_pull_policy=self.image_pull_policy,
            env=[
                client.V1EnvVar(
                    name='DAGSTER_PG_PASSWORD',
                    value_from=client.V1EnvVarSource(
                        secret_key_ref=client.V1SecretKeySelector(
                            name='dagster-postgresql',
                            key='postgresql-password')),
                ),
            ],
            env_from=self.env_from_sources,
            volume_mounts=[
                client.V1VolumeMount(
                    name='dagster-instance',
                    mount_path='{dagster_home}/dagster.yaml'.format(
                        dagster_home=self.dagster_home),
                    sub_path='dagster.yaml',
                )
            ],
        )

        config_map_volume = client.V1Volume(
            name='dagster-instance',
            config_map=client.V1ConfigMapVolumeSource(
                name=self.instance_config_map),
        )

        template = client.V1PodTemplateSpec(
            metadata=client.V1ObjectMeta(
                name='dagster-job-pod-%s' % run.run_id,
                labels=dagster_labels,
            ),
            spec=client.V1PodSpec(
                image_pull_secrets=self.image_pull_secrets,
                service_account_name=self.service_account_name,
                restart_policy='Never',
                containers=[job_container],
                volumes=[config_map_volume],
            ),
        )

        job = client.V1Job(
            api_version="batch/v1",
            kind="Job",
            metadata=client.V1ObjectMeta(name='dagster-job-%s' % run.run_id,
                                         labels=dagster_labels),
            spec=client.V1JobSpec(
                template=template,
                backoff_limit=BACKOFF_LIMIT,
                ttl_seconds_after_finished=TTL_SECONDS_AFTER_FINISHED,
            ),
        )
        return job
Exemple #12
0
    def construct_job(self, run):
        check.inst_param(run, 'run', PipelineRun)

        dagster_labels = {
            'app.kubernetes.io/name': 'dagster',
            'app.kubernetes.io/instance': 'dagster',
            'app.kubernetes.io/version': dagster_version,
        }

        execution_params = {
            'executionParams': {
                'selector': run.selector.to_graphql_input(),
                "environmentConfigData": run.environment_dict,
                'executionMetadata': {
                    "runId": run.run_id
                },
                "mode": run.mode,
            },
        }

        job_container = client.V1Container(
            name='dagster-job-%s' % run.run_id,
            image=self.job_image,
            command=['dagster-graphql'],
            args=["-p", "executePlan", "-v",
                  json.dumps(execution_params)],
            image_pull_policy=self.image_pull_policy,
            env=[
                client.V1EnvVar(name='DAGSTER_HOME',
                                value='/opt/dagster/dagster_home')
            ],
            volume_mounts=[
                client.V1VolumeMount(
                    name='dagster-instance',
                    mount_path='/opt/dagster/dagster_home/dagster.yaml',
                    sub_path='dagster.yaml',
                )
            ],
        )

        config_map_volume = client.V1Volume(
            name='dagster-instance',
            config_map=client.V1ConfigMapVolumeSource(
                name=self.instance_config_map),
        )

        template = client.V1PodTemplateSpec(
            metadata=client.V1ObjectMeta(name='dagster-job-pod-%s' %
                                         run.run_id,
                                         labels=dagster_labels),
            spec=client.V1PodSpec(
                image_pull_secrets=self.image_pull_secrets,
                service_account_name=self.service_account_name,
                restart_policy='Never',
                containers=[job_container],
                volumes=[config_map_volume],
            ),
        )

        job = client.V1Job(
            api_version="batch/v1",
            kind="Job",
            metadata=client.V1ObjectMeta(name='dagster-job-%s' % run.run_id,
                                         labels=dagster_labels),
            spec=client.V1JobSpec(
                template=template,
                backoff_limit=BACKOFF_LIMIT,
                ttl_seconds_after_finished=TTL_SECONDS_AFTER_FINISHED,
            ),
        )
        return job
def pretty_dump(data):
    return json.dumps(data, indent=2, separators=(",", ": "))
Exemple #14
0
def pretty_dump(data):
    return json.dumps(data, indent=2, separators=(',', ': '))
Exemple #15
0
def return_counts(response):
    counter = traced_counter.get()
    if counter and isinstance(counter, Counter):
        response.headers["x-dagster-call-counts"] = json.dumps(
            counter.counts())
    return response
    def construct_job(self, run):
        check.inst_param(run, 'run', PipelineRun)

        dagster_labels = {
            'app.kubernetes.io/name': 'dagster',
            'app.kubernetes.io/instance': 'dagster',
            'app.kubernetes.io/version': dagster_version,
        }

        init_container = client.V1Container(
            name='check-db-ready',
            image='postgres:9.6.16',
            command=[
                'sh',
                '-c',
                'until pg_isready -h {pg_host} -p {pg_port}; '
                'do echo waiting for database; sleep 2; done;'.format(
                    pg_host=self.postgres_host,
                    pg_port=self.postgres_port,
                ),
            ],
        )

        execution_params = {
            'executionParams': {
                'selector': run.selector.to_graphql_input(),
                "environmentConfigData": run.environment_dict,
                "mode": run.mode,
            }
        }

        job_container = client.V1Container(
            name='dagster-job-%s' % run.run_id,
            image=self.job_image,
            command=['dagster-graphql'],
            args=[
                "-p", "startPipelineExecution", "-v",
                json.dumps(execution_params)
            ],
            image_pull_policy='IfNotPresent',
            env=[
                client.V1EnvVar(name='DAGSTER_HOME',
                                value='/opt/dagster/dagster_home')
            ],
            volume_mounts=[
                client.V1VolumeMount(
                    name='dagster-instance',
                    mount_path='/opt/dagster/dagster_home/dagster.yaml',
                    sub_path='dagster.yaml',
                )
            ],
        )

        config_map_volume = client.V1Volume(
            name='dagster-instance',
            config_map=client.V1ConfigMapVolumeSource(name='dagster-instance'),
        )

        template = client.V1PodTemplateSpec(
            metadata=client.V1ObjectMeta(name='dagster-job-pod-%s' %
                                         run.run_id,
                                         labels=dagster_labels),
            spec=client.V1PodSpec(
                image_pull_secrets=self.image_pull_secrets,
                service_account_name=self.service_account_name,
                init_containers=[init_container],
                restart_policy='Never',
                containers=[job_container],
                volumes=[config_map_volume],
            ),
        )

        job = client.V1Job(
            api_version="batch/v1",
            kind="Job",
            metadata=client.V1ObjectMeta(name='dagster-job-%s' % run.run_id,
                                         labels=dagster_labels),
            spec=client.V1JobSpec(template=template, backoff_limit=4),
        )
        return job