Ejemplo n.º 1
0
    def test_3_submit_minimal_job(self):
        # The job itself will fail because the job files don't exist
        # We don't care, because then we would be testing spark
        # We care the job was submitted correctly, so that's what we test

        luigi.run(['--local-scheduler',
                   '--no-lock',
                   'DataprocSparkTask',
                   '--gcloud-project-id=' + PROJECT_ID,
                   '--dataproc-cluster-name=' + CLUSTER_NAME,
                   '--main-class=my.MinimalMainClass'])

        response = dataproc.get_dataproc_client().projects().regions().jobs() \
            .list(projectId=PROJECT_ID, region=REGION, clusterName=CLUSTER_NAME).execute()
        lastJob = response['jobs'][0]['sparkJob']

        self.assertEqual(lastJob['mainClass'], "my.MinimalMainClass")
Ejemplo n.º 2
0
    def test_5_submit_pyspark_job(self):
        # The job itself will fail because the job files don't exist
        # We don't care, because then we would be testing pyspark
        # We care the job was submitted correctly, so that's what we test

        luigi.run([
            '--local-scheduler', '--no-lock', 'DataprocPysparkTask',
            '--gcloud-project-id=' + PROJECT_ID,
            '--dataproc-cluster-name=' + CLUSTER_NAME,
            '--job-file=main_job.py', '--extra-files=extra1.py,extra2.py',
            '--job-args=foo,bar'
        ])

        response = dataproc.get_dataproc_client().projects().regions().jobs()\
            .list(projectId=PROJECT_ID, region=REGION, clusterName=CLUSTER_NAME).execute()
        lastJob = response['jobs'][0]['pysparkJob']

        self.assertEqual(lastJob['mainPythonFileUri'], "main_job.py")
        self.assertEqual(lastJob['pythonFileUris'], ["extra1.py", "extra2.py"])
        self.assertEqual(lastJob['args'], ["foo", "bar"])
Ejemplo n.º 3
0
    def test_5_submit_pyspark_job(self):
        # The job itself will fail because the job files don't exist
        # We don't care, because then we would be testing pyspark
        # We care the job was submitted correctly, so that's what we test

        luigi.run(['--local-scheduler',
                   '--no-lock',
                   'DataprocPysparkTask',
                   '--gcloud-project-id=' + PROJECT_ID,
                   '--dataproc-cluster-name=' + CLUSTER_NAME,
                   '--job-file=main_job.py',
                   '--extra-files=extra1.py,extra2.py',
                   '--job-args=foo,bar'])

        response = dataproc.get_dataproc_client().projects().regions().jobs()\
            .list(projectId=PROJECT_ID, region=REGION, clusterName=CLUSTER_NAME).execute()
        lastJob = response['jobs'][0]['pysparkJob']

        self.assertEqual(lastJob['mainPythonFileUri'], "main_job.py")
        self.assertEqual(lastJob['pythonFileUris'], ["extra1.py", "extra2.py"])
        self.assertEqual(lastJob['args'], ["foo", "bar"])