def test_3_submit_minimal_job(self): # The job itself will fail because the job files don't exist # We don't care, because then we would be testing spark # We care the job was submitted correctly, so that's what we test luigi.run(['--local-scheduler', '--no-lock', 'DataprocSparkTask', '--gcloud-project-id=' + PROJECT_ID, '--dataproc-cluster-name=' + CLUSTER_NAME, '--main-class=my.MinimalMainClass']) response = dataproc.get_dataproc_client().projects().regions().jobs() \ .list(projectId=PROJECT_ID, region=REGION, clusterName=CLUSTER_NAME).execute() lastJob = response['jobs'][0]['sparkJob'] self.assertEqual(lastJob['mainClass'], "my.MinimalMainClass")
def test_5_submit_pyspark_job(self): # The job itself will fail because the job files don't exist # We don't care, because then we would be testing pyspark # We care the job was submitted correctly, so that's what we test luigi.run([ '--local-scheduler', '--no-lock', 'DataprocPysparkTask', '--gcloud-project-id=' + PROJECT_ID, '--dataproc-cluster-name=' + CLUSTER_NAME, '--job-file=main_job.py', '--extra-files=extra1.py,extra2.py', '--job-args=foo,bar' ]) response = dataproc.get_dataproc_client().projects().regions().jobs()\ .list(projectId=PROJECT_ID, region=REGION, clusterName=CLUSTER_NAME).execute() lastJob = response['jobs'][0]['pysparkJob'] self.assertEqual(lastJob['mainPythonFileUri'], "main_job.py") self.assertEqual(lastJob['pythonFileUris'], ["extra1.py", "extra2.py"]) self.assertEqual(lastJob['args'], ["foo", "bar"])
def test_5_submit_pyspark_job(self): # The job itself will fail because the job files don't exist # We don't care, because then we would be testing pyspark # We care the job was submitted correctly, so that's what we test luigi.run(['--local-scheduler', '--no-lock', 'DataprocPysparkTask', '--gcloud-project-id=' + PROJECT_ID, '--dataproc-cluster-name=' + CLUSTER_NAME, '--job-file=main_job.py', '--extra-files=extra1.py,extra2.py', '--job-args=foo,bar']) response = dataproc.get_dataproc_client().projects().regions().jobs()\ .list(projectId=PROJECT_ID, region=REGION, clusterName=CLUSTER_NAME).execute() lastJob = response['jobs'][0]['pysparkJob'] self.assertEqual(lastJob['mainPythonFileUri'], "main_job.py") self.assertEqual(lastJob['pythonFileUris'], ["extra1.py", "extra2.py"]) self.assertEqual(lastJob['args'], ["foo", "bar"])