Example #1
0
 def _setup_steps(self):
     return [InstallPigStep()]
Example #2
0
pig_file = 's3://elasticmapreduce/samples/pig-apache/do-reports2.pig'
INPUT = 's3://elasticmapreduce/samples/pig-apache/input/'
OUTPUT = ('s3://org.unencrypted.emr.output/apache_sample/%s' %
          datetime.datetime.utcnow().strftime("%s"))

print """\
Running pig job with settings:

    SCRIPT={script}
    INPUT={input}
    OUPUT={output}
""".format(script=pig_file, input=INPUT, output=OUTPUT)

pig_args = ['-p', 'INPUT=%s' % INPUT,
            '-p', 'OUTPUT=%s' % OUTPUT]

pig_step = PigStep('Process Reports', pig_file, pig_args=pig_args)
steps = [InstallPigStep(), pig_step]

job_id = conn.run_jobflow(
    name='sample apache report',
    ec2_keyname=os.getenv("EC2_KEY_NAME"),
    steps=steps,
    log_uri="s3://org.unencrypted.emr.log/sampleflow_logs",
    enable_debugging=True,
    ami_version="latest",
    instance_groups=instance_groups,
    keep_alive=True)

print job_id