예제 #1
0
    def dumpEMRClusters(self):
        '''
      Method to dump EMR clusters info.
    '''

        try:
            if self.botoprfl[0] != "default":
                conn = boto.connect_emr(profile_name=self.botoprfl)
            else:
                conn = boto.connect_emr()
            if conn:
                print("\n<Start of EMR clusters>\n")
                print(" Jobflows: %s" % conn.describe_jobflows())
                self.opygenericroutines.prntLogErrWarnInfo('',
                                                           'info',
                                                           bresume=True)
                for c in conn.list_clusters().clusters:
                    ec = " %s" % c
                    self.opygenericroutines.prntLogErrWarnInfo(str(ec),
                                                               'info',
                                                               bresume=True)
                self.opygenericroutines.prntLogErrWarnInfo('',
                                                           'info',
                                                           bresume=True)
                print("\n<End of EMR clusters>\n")
        except Exception, e:
            serr = (
                '%s :: dumpEMRClusters(...) : connect_emr,list_clusters(...).clusters, '
                '%s' % (self.sclsnme, str(e)))
            prntErrWarnInfo(serr, bresume=True)
예제 #2
0
def test_create_instance_groups():
    conn = boto.connect_emr()

    step1 = StreamingStep(
        name="My wordcount example",
        mapper="s3n://elasticmapreduce/samples/wordcount/wordSplitter.py",
        reducer="aggregate",
        input="s3n://elasticmapreduce/samples/wordcount/input",
        output="s3n://output_bucket/output/wordcount_output",
    )

    job_id = conn.run_jobflow(name="My jobflow", log_uri="s3://some_bucket/jobflow_logs", steps=[step1])

    instance_group = InstanceGroup(6, "TASK", "c1.medium", "SPOT", "spot-0.07", "0.07")
    instance_group = conn.add_instance_groups(job_id, [instance_group])
    instance_group_id = instance_group.instancegroupids
    job_flow = conn.describe_jobflows()[0]
    int(job_flow.instancecount).should.equal(6)
    instance_group = job_flow.instancegroups[0]
    instance_group.instancegroupid.should.equal(instance_group_id)
    int(instance_group.instancerunningcount).should.equal(6)
    instance_group.instancerole.should.equal("TASK")
    instance_group.instancetype.should.equal("c1.medium")
    instance_group.market.should.equal("SPOT")
    instance_group.name.should.equal("spot-0.07")
    instance_group.bidprice.should.equal("0.07")
예제 #3
0
	def get_elapsed_time_emr(job, emrid):
		"""Get elapsed time for EMR job with job flow id emrid, based on EMR job information."""
		emr = boto.connect_emr()
		jobflow = emr.describe_jobflow(emrid)
		emr.close()
	
		try:
			steps = [s for s in jobflow.steps if int(s.name.split("-")[1]) == job.id]
		except IndexError:
			try:
				stepcount = -2 if jobflow.steps[-1].name == "SimpleJoin" else -1
				starttime = datetime.datetime.strptime(jobflow.steps[stepcount].creationdatetime, '%Y-%m-%dT%H:%M:%SZ')
			except AttributeError as e:
				starttime = datetime.datetime.strptime(jobflow.startdatetime, '%Y-%m-%dT%H:%M:%SZ')
			except:
				starttime = datetime.datetime.strptime(jobflow.steps[-1].creationdatetime, '%Y-%m-%dT%H:%M:%SZ')
	
			try:
				endtime = datetime.datetime.strptime(jobflow.steps[-1].enddatetime, '%Y-%m-%dT%H:%M:%SZ')
			except AttributeError:
				endtime = datetime.datetime.today()	
			except:
				endtime = datetime.datetime.strptime(jobflow.steps[-1].enddatetime, '%Y-%m-%dT%H:%M:%SZ')
		else:
			starttime = datetime.datetime.strptime(steps[0].creationdatetime, '%Y-%m-%dT%H:%M:%SZ')
			endtime = datetime.datetime.strptime(steps[-1].enddatetime, '%Y-%m-%dT%H:%M:%SZ')

		return (endtime-starttime)		
예제 #4
0
def test_bootstrap_actions():
    bootstrap_actions = [
        BootstrapAction(
            name="bs1",
            path="path/to/script",
            bootstrap_action_args=["arg1", "arg2&arg3"],
        ),
        BootstrapAction(name="bs2",
                        path="path/to/anotherscript",
                        bootstrap_action_args=[]),
    ]

    conn = boto.connect_emr()
    cluster_id = conn.run_jobflow(bootstrap_actions=bootstrap_actions,
                                  **run_jobflow_args)

    jf = conn.describe_jobflow(cluster_id)
    for x, y in zip(jf.bootstrapactions, bootstrap_actions):
        x.name.should.equal(y.name)
        x.path.should.equal(y.path)
        list(o.value for o in x.args).should.equal(y.args())

    resp = conn.list_bootstrap_actions(cluster_id)
    for i, y in enumerate(bootstrap_actions):
        x = resp.actions[i]
        x.name.should.equal(y.name)
        x.scriptpath.should.equal(y.path)
        list(arg.value for arg in x.args).should.equal(y.args())
예제 #5
0
def test_run_jobflow_with_visible_to_all_users():
    conn = boto.connect_emr()
    for expected in (True, False):
        job_id = conn.run_jobflow(visible_to_all_users=expected,
                                  **run_jobflow_args)
        job_flow = conn.describe_jobflow(job_id)
        job_flow.visibletoallusers.should.equal(str(expected).lower())
예제 #6
0
def test_bootstrap_actions():
    bootstrap_actions = [
        BootstrapAction(
            name='bs1',
            path='path/to/script',
            bootstrap_action_args=['arg1', 'arg2&arg3']),
        BootstrapAction(
            name='bs2',
            path='path/to/anotherscript',
            bootstrap_action_args=[])
    ]

    conn = boto.connect_emr()
    cluster_id = conn.run_jobflow(
        bootstrap_actions=bootstrap_actions,
        **run_jobflow_args
    )

    jf = conn.describe_jobflow(cluster_id)
    for x, y in zip(jf.bootstrapactions, bootstrap_actions):
        x.name.should.equal(y.name)
        x.path.should.equal(y.path)
        list(o.value for o in x.args).should.equal(y.args())

    resp = conn.list_bootstrap_actions(cluster_id)
    for i, y in enumerate(bootstrap_actions):
        x = resp.actions[i]
        x.name.should.equal(y.name)
        x.scriptpath.should.equal(y.path)
        list(arg.value for arg in x.args).should.equal(y.args())
예제 #7
0
파일: test_emr.py 프로젝트: joshp123/moto
def test_create_instance_groups():
    conn = boto.connect_emr()

    step1 = StreamingStep(
        name='My wordcount example',
        mapper='s3n://elasticmapreduce/samples/wordcount/wordSplitter.py',
        reducer='aggregate',
        input='s3n://elasticmapreduce/samples/wordcount/input',
        output='s3n://output_bucket/output/wordcount_output')

    job_id = conn.run_jobflow(
        name='My jobflow',
        log_uri='s3://some_bucket/jobflow_logs',
        steps=[step1],
    )

    instance_group = InstanceGroup(6, 'TASK', 'c1.medium', 'SPOT', 'spot-0.07',
                                   '0.07')
    instance_group = conn.add_instance_groups(job_id, [instance_group])
    instance_group_id = instance_group.instancegroupids
    job_flow = conn.describe_jobflows()[0]
    int(job_flow.instancecount).should.equal(6)
    instance_group = job_flow.instancegroups[0]
    instance_group.instancegroupid.should.equal(instance_group_id)
    int(instance_group.instancerunningcount).should.equal(6)
    instance_group.instancerole.should.equal('TASK')
    instance_group.instancetype.should.equal('c1.medium')
    instance_group.market.should.equal('SPOT')
    instance_group.name.should.equal('spot-0.07')
    instance_group.bidprice.should.equal('0.07')
예제 #8
0
파일: test_emr.py 프로젝트: crosswise/moto
def test_create_instance_groups():
    conn = boto.connect_emr()

    step1 = StreamingStep(
        name='My wordcount example',
        mapper='s3n://elasticmapreduce/samples/wordcount/wordSplitter.py',
        reducer='aggregate',
        input='s3n://elasticmapreduce/samples/wordcount/input',
        output='s3n://output_bucket/output/wordcount_output'
    )

    job_id = conn.run_jobflow(
        name='My jobflow',
        log_uri='s3://some_bucket/jobflow_logs',
        steps=[step1],
    )

    instance_group = InstanceGroup(6, 'TASK', 'c1.medium', 'SPOT', 'spot-0.07', '0.07')
    instance_group = conn.add_instance_groups(job_id, [instance_group])
    instance_group_id = instance_group.instancegroupids
    job_flow = conn.describe_jobflows()[0]
    int(job_flow.instancecount).should.equal(6)
    instance_group = job_flow.instancegroups[0]
    instance_group.instancegroupid.should.equal(instance_group_id)
    int(instance_group.instancerunningcount).should.equal(6)
    instance_group.instancerole.should.equal('TASK')
    instance_group.instancetype.should.equal('c1.medium')
    instance_group.market.should.equal('SPOT')
    instance_group.name.should.equal('spot-0.07')
    instance_group.bidprice.should.equal('0.07')
예제 #9
0
def test_modify_instance_groups():
    conn = boto.connect_emr()

    step1 = StreamingStep(
        name="My wordcount example",
        mapper="s3n://elasticmapreduce/samples/wordcount/wordSplitter.py",
        reducer="aggregate",
        input="s3n://elasticmapreduce/samples/wordcount/input",
        output="s3n://output_bucket/output/wordcount_output",
    )

    job_id = conn.run_jobflow(name="My jobflow", log_uri="s3://some_bucket/jobflow_logs", steps=[step1])

    instance_group1 = InstanceGroup(6, "TASK", "c1.medium", "SPOT", "spot-0.07", "0.07")
    instance_group2 = InstanceGroup(6, "TASK", "c1.medium", "SPOT", "spot-0.07", "0.07")
    instance_group = conn.add_instance_groups(job_id, [instance_group1, instance_group2])
    instance_group_ids = instance_group.instancegroupids.split(",")

    job_flow = conn.describe_jobflows()[0]
    int(job_flow.instancecount).should.equal(12)
    instance_group = job_flow.instancegroups[0]
    int(instance_group.instancerunningcount).should.equal(6)

    conn.modify_instance_groups(instance_group_ids, [2, 3])

    job_flow = conn.describe_jobflows()[0]
    int(job_flow.instancecount).should.equal(5)
    instance_group1 = [group for group in job_flow.instancegroups if group.instancegroupid == instance_group_ids[0]][0]
    int(instance_group1.instancerunningcount).should.equal(2)
    instance_group2 = [group for group in job_flow.instancegroups if group.instancegroupid == instance_group_ids[1]][0]
    int(instance_group2.instancerunningcount).should.equal(3)
예제 #10
0
파일: test_emr.py 프로젝트: joshp123/moto
def test_add_steps_to_flow():
    conn = boto.connect_emr()

    step1 = StreamingStep(
        name='My wordcount example',
        mapper='s3n://elasticmapreduce/samples/wordcount/wordSplitter.py',
        reducer='aggregate',
        input='s3n://elasticmapreduce/samples/wordcount/input',
        output='s3n://output_bucket/output/wordcount_output')

    job_id = conn.run_jobflow(name='My jobflow',
                              log_uri='s3://some_bucket/jobflow_logs',
                              steps=[step1])

    job_flow = conn.describe_jobflow(job_id)
    job_flow.state.should.equal('STARTING')
    job_flow.jobflowid.should.equal(job_id)
    job_flow.name.should.equal('My jobflow')
    job_flow.loguri.should.equal('s3://some_bucket/jobflow_logs')

    step2 = StreamingStep(
        name='My wordcount example2',
        mapper='s3n://elasticmapreduce/samples/wordcount/wordSplitter2.py',
        reducer='aggregate',
        input='s3n://elasticmapreduce/samples/wordcount/input2',
        output='s3n://output_bucket/output/wordcount_output2')

    conn.add_jobflow_steps(job_id, [step2])

    job_flow = conn.describe_jobflow(job_id)
    job_step = job_flow.steps[0]
    job_step.name.should.equal('My wordcount example')
    job_step.state.should.equal('STARTING')
    args = [arg.value for arg in job_step.args]
    args.should.equal([
        '-mapper',
        's3n://elasticmapreduce/samples/wordcount/wordSplitter.py',
        '-reducer',
        'aggregate',
        '-input',
        's3n://elasticmapreduce/samples/wordcount/input',
        '-output',
        's3n://output_bucket/output/wordcount_output',
    ])

    job_step2 = job_flow.steps[1]
    job_step2.name.should.equal('My wordcount example2')
    job_step2.state.should.equal('PENDING')
    args = [arg.value for arg in job_step2.args]
    args.should.equal([
        '-mapper',
        's3n://elasticmapreduce/samples/wordcount/wordSplitter2.py',
        '-reducer',
        'aggregate',
        '-input',
        's3n://elasticmapreduce/samples/wordcount/input2',
        '-output',
        's3n://output_bucket/output/wordcount_output2',
    ])
예제 #11
0
def test_terminate_job_flow():
    conn = boto.connect_emr()
    job_id = conn.run_jobflow(name="My jobflow", log_uri="s3://some_bucket/jobflow_logs", steps=[])

    flow = conn.describe_jobflows()[0]
    flow.state.should.equal("STARTING")
    conn.terminate_jobflow(job_id)
    flow = conn.describe_jobflows()[0]
    flow.state.should.equal("TERMINATED")
예제 #12
0
def test_run_jobflow_with_visible_to_all_users():
    conn = boto.connect_emr()
    for expected in (True, False):
        job_id = conn.run_jobflow(
            visible_to_all_users=expected,
            **run_jobflow_args
        )
        job_flow = conn.describe_jobflow(job_id)
        job_flow.visibletoallusers.should.equal(str(expected).lower())
예제 #13
0
def test_terminate_jobflow():
    conn = boto.connect_emr()
    job_id = conn.run_jobflow(**run_jobflow_args)
    flow = conn.describe_jobflows()[0]
    flow.state.should.equal("WAITING")

    conn.terminate_jobflow(job_id)
    flow = conn.describe_jobflows()[0]
    flow.state.should.equal("TERMINATED")
예제 #14
0
파일: test_emr.py 프로젝트: rocky4570/moto
def test_terminate_jobflow():
    conn = boto.connect_emr()
    job_id = conn.run_jobflow(**run_jobflow_args)
    flow = conn.describe_jobflows()[0]
    flow.state.should.equal("WAITING")

    conn.terminate_jobflow(job_id)
    flow = conn.describe_jobflows()[0]
    flow.state.should.equal("TERMINATED")
예제 #15
0
def run_tests(things, tests):
    if len(tests) == 0:
        raise Exception("no tests")
    oldNum = len(tests)
    tests = fix_suites(tests)
    print("tests expanded from %d to %d" % (oldNum, len(tests)))

    print("things:%s\ntests:%s\n" % (things, tests))

    emr = boto.connect_emr(settings.emr_id, settings.emr_key)

    def http(path):
        return "http://%s.s3.amazonaws.com/%s" % (settings.emr_bucket, path)

    run_s3_path = "emr/%s/%s/%s/" % (os.getenv("USER"), os.getenv(
        "HOST"), datetime.datetime.today().strftime("%Y%m%d-%H%M"))

    run_s3_root = "s3n://%s/%s/" % (settings.emr_bucket, run_s3_path)

    out = run_s3_root + "out"
    logs = run_s3_root + "logs"

    jar = "s3n://%s/%s" % (settings.emr_bucket, things[2])
    step_args = [http(things[0]), http(things[1]), out, ",".join(tests)]

    step = boto.emr.step.JarStep("emr main", jar=jar, step_args=step_args)
    print("jar:%s\nargs:%s" % (jar, step_args))

    setup = boto.emr.BootstrapAction(
        "setup", "s3n://%s/%s" % (settings.emr_bucket, things[3]), [])

    jobid = emr.run_jobflow(name="Mongo EMR for %s from %s" %
                            (os.getenv("USER"), os.getenv("HOST")),
                            ec2_keyname="emr1",
                            slave_instance_type="m1.large",
                            ami_version="latest",
                            num_instances=5,
                            log_uri=logs,
                            bootstrap_actions=[setup],
                            steps=[step])

    print("%s jobid: %s" % (datetime.datetime.today(), jobid))

    while (True):
        flow = emr.describe_jobflow(jobid)
        print("%s status: %s" % (datetime.datetime.today(), flow.state))
        if flow.state == "COMPLETED" or flow.state == "FAILED":
            break
        time.sleep(30)

    syncdir = "build/emrout/" + jobid + "/"
    sync_s3(run_s3_path, syncdir)

    final_out = "build/emrout/" + jobid + "/"

    print("output in: " + final_out)
    do_output(final_out)
예제 #16
0
파일: emr.py 프로젝트: 10genReviews/mongo
def run_tests( things , tests ):
    if len(tests) == 0:
        raise Exception( "no tests" )
    oldNum = len(tests)
    tests = fix_suites( tests )
    print( "tests expanded from %d to %d" % ( oldNum , len(tests) ) )
    
    print( "things:%s\ntests:%s\n" % ( things , tests ) )

    emr = boto.connect_emr( settings.emr_id , settings.emr_key )

    def http(path):
        return "http://%s.s3.amazonaws.com/%s" % ( settings.emr_bucket , path )
    
    run_s3_path = "emr/%s/%s/%s/" % ( os.getenv( "USER" ) , 
                                      os.getenv( "HOST" ) , 
                                      datetime.datetime.today().strftime( "%Y%m%d-%H%M" ) )

    run_s3_root = "s3n://%s/%s/" % ( settings.emr_bucket , run_s3_path )

    out = run_s3_root + "out"
    logs = run_s3_root + "logs"

    jar="s3n://%s/%s" % ( settings.emr_bucket , things[2] )
    step_args=[ http(things[0]) , http(things[1]) , out , ",".join(tests) ]
    
    step = boto.emr.step.JarStep( "emr main" , jar=jar,step_args=step_args )
    print( "jar:%s\nargs:%s" % ( jar , step_args ) )

    setup = boto.emr.BootstrapAction( "setup" , "s3n://%s/%s" % ( settings.emr_bucket , things[3] ) , []  )

    jobid = emr.run_jobflow( name = "Mongo EMR for %s from %s" % ( os.getenv( "USER" ) , os.getenv( "HOST" ) ) ,
                             ec2_keyname = "emr1" , 
                             slave_instance_type = "m1.large" ,
                             ami_version = "latest" ,
                             num_instances=5 ,
                             log_uri = logs ,
                             bootstrap_actions = [ setup ] , 
                             steps = [ step ] )

    
    print( "%s jobid: %s" % ( datetime.datetime.today() , jobid ) )

    while ( True ):
        flow = emr.describe_jobflow( jobid )
        print( "%s status: %s" % ( datetime.datetime.today() , flow.state ) )
        if flow.state == "COMPLETED" or flow.state == "FAILED":
            break
        time.sleep(30)

    syncdir = "build/emrout/" + jobid + "/"
    sync_s3( run_s3_path , syncdir )
    
    final_out = "build/emrout/" + jobid + "/" 
    
    print("output in: " + final_out )
    do_output( final_out )
예제 #17
0
파일: test_emr.py 프로젝트: joshp123/moto
def test_terminate_job_flow():
    conn = boto.connect_emr()
    job_id = conn.run_jobflow(name='My jobflow',
                              log_uri='s3://some_bucket/jobflow_logs',
                              steps=[])

    flow = conn.describe_jobflows()[0]
    flow.state.should.equal('STARTING')
    conn.terminate_jobflow(job_id)
    flow = conn.describe_jobflows()[0]
    flow.state.should.equal('TERMINATED')
예제 #18
0
파일: test_emr.py 프로젝트: joshp123/moto
def test_create_job_flow_visible_to_all_users():
    conn = boto.connect_emr()

    job_id = conn.run_jobflow(
        name='My jobflow',
        log_uri='s3://some_bucket/jobflow_logs',
        steps=[],
        visible_to_all_users=True,
    )
    job_flow = conn.describe_jobflow(job_id)
    job_flow.visibletoallusers.should.equal('True')
예제 #19
0
파일: test_emr.py 프로젝트: crosswise/moto
def test_create_job_flow_visible_to_all_users():
    conn = boto.connect_emr()

    job_id = conn.run_jobflow(
        name='My jobflow',
        log_uri='s3://some_bucket/jobflow_logs',
        steps=[],
        visible_to_all_users=True,
    )
    job_flow = conn.describe_jobflow(job_id)
    job_flow.visibletoallusers.should.equal('True')
예제 #20
0
def test_create_job_flow_visible_to_all_users():
    conn = boto.connect_emr()

    job_id = conn.run_jobflow(
        name="My jobflow",
        log_uri="s3://some_bucket/jobflow_logs",
        job_flow_role="some-role-arn",
        steps=[],
        visible_to_all_users=True,
    )
    job_flow = conn.describe_jobflow(job_id)
    job_flow.visibletoallusers.should.equal("True")
예제 #21
0
파일: test_emr.py 프로젝트: joshp123/moto
def test_create_job_flow_with_new_params():
    # Test that run_jobflow works with newer params
    conn = boto.connect_emr()

    conn.run_jobflow(
        name='My jobflow',
        log_uri='s3://some_bucket/jobflow_logs',
        master_instance_type='m1.medium',
        slave_instance_type='m1.small',
        job_flow_role='some-role-arn',
        steps=[],
    )
예제 #22
0
파일: test_emr.py 프로젝트: crosswise/moto
def test_describe_cluster():
    conn = boto.connect_emr()
    job_id = conn.run_jobflow(
        name='My jobflow',
        log_uri='s3://some_bucket/jobflow_logs',
        steps=[],
    )

    cluster = conn.describe_cluster(job_id)
    cluster.name.should.equal("My jobflow")
    cluster.normalizedinstancehours.should.equal('0')
    cluster.status.state.should.equal("RUNNING")
예제 #23
0
파일: test_emr.py 프로젝트: crosswise/moto
def test_create_job_flow_with_new_params():
    # Test that run_jobflow works with newer params
    conn = boto.connect_emr()

    conn.run_jobflow(
        name='My jobflow',
        log_uri='s3://some_bucket/jobflow_logs',
        master_instance_type='m1.medium',
        slave_instance_type='m1.small',
        job_flow_role='some-role-arn',
        steps=[],
    )
예제 #24
0
def test_describe_cluster():
    conn = boto.connect_emr()
    job_id = conn.run_jobflow(
        name='My jobflow',
        log_uri='s3://some_bucket/jobflow_logs',
        steps=[],
    )

    cluster = conn.describe_cluster(job_id)
    cluster.name.should.equal("My jobflow")
    cluster.normalizedinstancehours.should.equal('0')
    cluster.status.state.should.equal("RUNNING")
예제 #25
0
파일: test_emr.py 프로젝트: crosswise/moto
def test_terminate_job_flow():
    conn = boto.connect_emr()
    job_id = conn.run_jobflow(
        name='My jobflow',
        log_uri='s3://some_bucket/jobflow_logs',
        steps=[]
    )

    flow = conn.describe_jobflows()[0]
    flow.state.should.equal('STARTING')
    conn.terminate_jobflow(job_id)
    flow = conn.describe_jobflows()[0]
    flow.state.should.equal('TERMINATED')
예제 #26
0
파일: test_emr.py 프로젝트: rocky4570/moto
def test_set_termination_protection():
    conn = boto.connect_emr()
    job_id = conn.run_jobflow(**run_jobflow_args)
    job_flow = conn.describe_jobflow(job_id)
    job_flow.terminationprotected.should.equal("false")

    conn.set_termination_protection(job_id, True)
    job_flow = conn.describe_jobflow(job_id)
    job_flow.terminationprotected.should.equal("true")

    conn.set_termination_protection(job_id, False)
    job_flow = conn.describe_jobflow(job_id)
    job_flow.terminationprotected.should.equal("false")
예제 #27
0
def step_completed(emrid):
	"""Check if EMR job with jobflow id emrid has completed."""
	emr = boto.connect_emr()
	job = emr.describe_jobflow(emrid)
	step = job.steps[-1]
	emr.close()
	
	# print >> sys.stderr, step.state
	
	if step.state == "COMPLETED":
		return True
	else:
		return False
예제 #28
0
def test_set_termination_protection():
    conn = boto.connect_emr()
    job_id = conn.run_jobflow(**run_jobflow_args)
    job_flow = conn.describe_jobflow(job_id)
    job_flow.terminationprotected.should.equal("false")

    conn.set_termination_protection(job_id, True)
    job_flow = conn.describe_jobflow(job_id)
    job_flow.terminationprotected.should.equal("true")

    conn.set_termination_protection(job_id, False)
    job_flow = conn.describe_jobflow(job_id)
    job_flow.terminationprotected.should.equal("false")
예제 #29
0
파일: test_emr.py 프로젝트: rocky4570/moto
def test_run_jobflow():
    conn = boto.connect_emr()
    args = run_jobflow_args.copy()
    job_id = conn.run_jobflow(**args)
    job_flow = conn.describe_jobflow(job_id)
    job_flow.state.should.equal("WAITING")
    job_flow.jobflowid.should.equal(job_id)
    job_flow.name.should.equal(args["name"])
    job_flow.masterinstancetype.should.equal(args["master_instance_type"])
    job_flow.slaveinstancetype.should.equal(args["slave_instance_type"])
    job_flow.loguri.should.equal(args["log_uri"])
    job_flow.visibletoallusers.should.equal("false")
    int(job_flow.normalizedinstancehours).should.equal(0)
    job_flow.steps.should.have.length_of(0)
예제 #30
0
  def dumpEMRClusters(self):
    '''
      Method to dump EMR clusters info.
    '''

    try:
      if self.botoprfl[0] != "default":
        conn = boto.connect_emr(profile_name = self.botoprfl)
      else:
        conn = boto.connect_emr()
      if conn:
        print("\n<Start of EMR clusters>\n")
        print(" Jobflows: %s" %conn.describe_jobflows())
        self.opygenericroutines.prntLogErrWarnInfo('', 'info', bresume = True)
        for c in conn.list_clusters().clusters:
          ec = " %s" %c
          self.opygenericroutines.prntLogErrWarnInfo(str(ec), 'info', bresume = True)
        self.opygenericroutines.prntLogErrWarnInfo('', 'info', bresume = True)
        print("\n<End of EMR clusters>\n")
    except Exception, e:
      serr = ('%s :: dumpEMRClusters(...) : connect_emr,list_clusters(...).clusters, '
              '%s' %(self.sclsnme, str(e)))
      prntErrWarnInfo(serr, bresume = True)
예제 #31
0
파일: test_emr.py 프로젝트: tomviner/moto
def test_describe_jobflows():
    conn = boto.connect_emr()
    job1_id = conn.run_jobflow(**run_jobflow_args)
    job2_id = conn.run_jobflow(**run_jobflow_args)

    jobs = conn.describe_jobflows()
    jobs.should.have.length_of(2)

    jobs = conn.describe_jobflows(jobflow_ids=[job2_id])
    jobs.should.have.length_of(1)
    jobs[0].jobflowid.should.equal(job2_id)

    first_job = conn.describe_jobflow(job1_id)
    first_job.jobflowid.should.equal(job1_id)
예제 #32
0
def get_jobflow_status(emr_id):
	"""Get the EMR jobflow state for EMR jobflow id emr_id."""
	conn = boto.connect_emr()

	jobflow = conn.describe_jobflow(emr_id)
	status = jobflow.state
	try:
		details = jobflow.laststatechangereason
		url = "http://%s:9100" % jobflow.masterpublicdnsname
	except AttributeError:
		details = ""
		url = ""

	return status, details, url
예제 #33
0
파일: test_emr.py 프로젝트: rocky4570/moto
def test_tags():
    input_tags = {"tag1": "val1", "tag2": "val2"}

    conn = boto.connect_emr()
    cluster_id = conn.run_jobflow(**run_jobflow_args)

    conn.add_tags(cluster_id, input_tags)
    cluster = conn.describe_cluster(cluster_id)
    cluster.tags.should.have.length_of(2)
    dict((t.key, t.value) for t in cluster.tags).should.equal(input_tags)

    conn.remove_tags(cluster_id, list(input_tags.keys()))
    cluster = conn.describe_cluster(cluster_id)
    cluster.tags.should.have.length_of(0)
예제 #34
0
def test_run_jobflow():
    conn = boto.connect_emr()
    args = run_jobflow_args.copy()
    job_id = conn.run_jobflow(**args)
    job_flow = conn.describe_jobflow(job_id)
    job_flow.state.should.equal("WAITING")
    job_flow.jobflowid.should.equal(job_id)
    job_flow.name.should.equal(args["name"])
    job_flow.masterinstancetype.should.equal(args["master_instance_type"])
    job_flow.slaveinstancetype.should.equal(args["slave_instance_type"])
    job_flow.loguri.should.equal(args["log_uri"])
    job_flow.visibletoallusers.should.equal("false")
    int(job_flow.normalizedinstancehours).should.equal(0)
    job_flow.steps.should.have.length_of(0)
예제 #35
0
def test_tags():
    input_tags = {"tag1": "val1", "tag2": "val2"}

    conn = boto.connect_emr()
    cluster_id = conn.run_jobflow(**run_jobflow_args)

    conn.add_tags(cluster_id, input_tags)
    cluster = conn.describe_cluster(cluster_id)
    cluster.tags.should.have.length_of(2)
    dict((t.key, t.value) for t in cluster.tags).should.equal(input_tags)

    conn.remove_tags(cluster_id, list(input_tags.keys()))
    cluster = conn.describe_cluster(cluster_id)
    cluster.tags.should.have.length_of(0)
예제 #36
0
def test_describe_jobflows():
    conn = boto.connect_emr()
    args = run_jobflow_args.copy()
    expected = {}

    for idx in range(4):
        cluster_name = "cluster" + str(idx)
        args["name"] = cluster_name
        cluster_id = conn.run_jobflow(**args)
        expected[cluster_id] = {
            "id": cluster_id,
            "name": cluster_name,
            "state": "WAITING",
        }

    # need sleep since it appears the timestamp is always rounded to
    # the nearest second internally
    time.sleep(1)
    timestamp = datetime.now(pytz.utc)
    time.sleep(1)

    for idx in range(4, 6):
        cluster_name = "cluster" + str(idx)
        args["name"] = cluster_name
        cluster_id = conn.run_jobflow(**args)
        conn.terminate_jobflow(cluster_id)
        expected[cluster_id] = {
            "id": cluster_id,
            "name": cluster_name,
            "state": "TERMINATED",
        }
    jobs = conn.describe_jobflows()
    jobs.should.have.length_of(6)

    for cluster_id in expected:
        resp = conn.describe_jobflows(jobflow_ids=[cluster_id])
        resp.should.have.length_of(1)
        resp[0].jobflowid.should.equal(cluster_id)

    resp = conn.describe_jobflows(states=["WAITING"])
    resp.should.have.length_of(4)
    for x in resp:
        x.state.should.equal("WAITING")

    resp = conn.describe_jobflows(created_before=timestamp)
    resp.should.have.length_of(4)

    resp = conn.describe_jobflows(created_after=timestamp)
    resp.should.have.length_of(2)
예제 #37
0
파일: test_emr.py 프로젝트: zapier/moto
def test_describe_jobflows():
    conn = boto.connect_emr()
    args = run_jobflow_args.copy()
    expected = {}

    for idx in range(4):
        cluster_name = 'cluster' + str(idx)
        args['name'] = cluster_name
        cluster_id = conn.run_jobflow(**args)
        expected[cluster_id] = {
            'id': cluster_id,
            'name': cluster_name,
            'state': 'WAITING'
        }

    # need sleep since it appears the timestamp is always rounded to
    # the nearest second internally
    time.sleep(1)
    timestamp = datetime.now(pytz.utc)
    time.sleep(1)

    for idx in range(4, 6):
        cluster_name = 'cluster' + str(idx)
        args['name'] = cluster_name
        cluster_id = conn.run_jobflow(**args)
        conn.terminate_jobflow(cluster_id)
        expected[cluster_id] = {
            'id': cluster_id,
            'name': cluster_name,
            'state': 'TERMINATED'
        }
    jobs = conn.describe_jobflows()
    jobs.should.have.length_of(6)

    for cluster_id, y in expected.items():
        resp = conn.describe_jobflows(jobflow_ids=[cluster_id])
        resp.should.have.length_of(1)
        resp[0].jobflowid.should.equal(cluster_id)

    resp = conn.describe_jobflows(states=['WAITING'])
    resp.should.have.length_of(4)
    for x in resp:
        x.state.should.equal('WAITING')

    resp = conn.describe_jobflows(created_before=timestamp)
    resp.should.have.length_of(4)

    resp = conn.describe_jobflows(created_after=timestamp)
    resp.should.have.length_of(2)
예제 #38
0
def test_describe_jobflows():
    conn = boto.connect_emr()
    args = run_jobflow_args.copy()
    expected = {}

    for idx in range(4):
        cluster_name = 'cluster' + str(idx)
        args['name'] = cluster_name
        cluster_id = conn.run_jobflow(**args)
        expected[cluster_id] = {
            'id': cluster_id,
            'name': cluster_name,
            'state': 'WAITING'
        }

    # need sleep since it appears the timestamp is always rounded to
    # the nearest second internally
    time.sleep(1)
    timestamp = datetime.now(pytz.utc)
    time.sleep(1)

    for idx in range(4, 6):
        cluster_name = 'cluster' + str(idx)
        args['name'] = cluster_name
        cluster_id = conn.run_jobflow(**args)
        conn.terminate_jobflow(cluster_id)
        expected[cluster_id] = {
            'id': cluster_id,
            'name': cluster_name,
            'state': 'TERMINATED'
        }
    jobs = conn.describe_jobflows()
    jobs.should.have.length_of(6)

    for cluster_id, y in expected.items():
        resp = conn.describe_jobflows(jobflow_ids=[cluster_id])
        resp.should.have.length_of(1)
        resp[0].jobflowid.should.equal(cluster_id)

    resp = conn.describe_jobflows(states=['WAITING'])
    resp.should.have.length_of(4)
    for x in resp:
        x.state.should.equal('WAITING')

    resp = conn.describe_jobflows(created_before=timestamp)
    resp.should.have.length_of(4)

    resp = conn.describe_jobflows(created_after=timestamp)
    resp.should.have.length_of(2)
예제 #39
0
파일: test_emr.py 프로젝트: rocky4570/moto
def test_run_jobflow_with_instance_groups():
    input_groups = dict((g.name, g) for g in input_instance_groups)
    conn = boto.connect_emr()
    job_id = conn.run_jobflow(instance_groups=input_instance_groups, **run_jobflow_args)
    job_flow = conn.describe_jobflow(job_id)
    int(job_flow.instancecount).should.equal(sum(g.num_instances for g in input_instance_groups))
    for instance_group in job_flow.instancegroups:
        expected = input_groups[instance_group.name]
        instance_group.should.have.property("instancegroupid")
        int(instance_group.instancerunningcount).should.equal(expected.num_instances)
        instance_group.instancerole.should.equal(expected.role)
        instance_group.instancetype.should.equal(expected.type)
        instance_group.market.should.equal(expected.market)
        if hasattr(expected, "bidprice"):
            instance_group.bidprice.should.equal(expected.bidprice)
예제 #40
0
파일: test_emr.py 프로젝트: rocky4570/moto
def test_set_visible_to_all_users():
    conn = boto.connect_emr()
    args = run_jobflow_args.copy()
    args["visible_to_all_users"] = False
    job_id = conn.run_jobflow(**args)
    job_flow = conn.describe_jobflow(job_id)
    job_flow.visibletoallusers.should.equal("false")

    conn.set_visible_to_all_users(job_id, True)
    job_flow = conn.describe_jobflow(job_id)
    job_flow.visibletoallusers.should.equal("true")

    conn.set_visible_to_all_users(job_id, False)
    job_flow = conn.describe_jobflow(job_id)
    job_flow.visibletoallusers.should.equal("false")
예제 #41
0
파일: test_emr.py 프로젝트: crosswise/moto
def test_list_clusters():
    conn = boto.connect_emr()
    conn.run_jobflow(
        name='My jobflow',
        log_uri='s3://some_bucket/jobflow_logs',
        steps=[],
    )

    summary = conn.list_clusters()
    clusters = summary.clusters
    clusters.should.have.length_of(1)
    cluster = clusters[0]
    cluster.name.should.equal("My jobflow")
    cluster.normalizedinstancehours.should.equal('0')
    cluster.status.state.should.equal("RUNNING")
예제 #42
0
def test_list_clusters():
    conn = boto.connect_emr()
    conn.run_jobflow(
        name='My jobflow',
        log_uri='s3://some_bucket/jobflow_logs',
        steps=[],
    )

    summary = conn.list_clusters()
    clusters = summary.clusters
    clusters.should.have.length_of(1)
    cluster = clusters[0]
    cluster.name.should.equal("My jobflow")
    cluster.normalizedinstancehours.should.equal('0')
    cluster.status.state.should.equal("RUNNING")
예제 #43
0
def create_job_flow(steps, job):
	"""Start EMR job"""
	conn = boto.connect_emr()
	
	job_flows = conn.describe_jobflows(['WAITING'])
	for jf in job_flows:
		if int(jf.instancecount) >= int(job.nodes):
			conn.add_jobflow_steps(jf.jobflowid, steps)
			jobid = jf.jobflowid
			break
	else:
		jobid = conn.run_jobflow("nsr web jobflow", log_uri="s3n://nsr-logs", master_instance_type=str(job.node_size), slave_instance_type=str(job.node_size), num_instances=job.nodes, action_on_failure="CONTINUE", steps=steps, keep_alive=True)
		
	conn.close()
	return jobid
예제 #44
0
def test_set_visible_to_all_users():
    conn = boto.connect_emr()
    args = run_jobflow_args.copy()
    args["visible_to_all_users"] = False
    job_id = conn.run_jobflow(**args)
    job_flow = conn.describe_jobflow(job_id)
    job_flow.visibletoallusers.should.equal("false")

    conn.set_visible_to_all_users(job_id, True)
    job_flow = conn.describe_jobflow(job_id)
    job_flow.visibletoallusers.should.equal("true")

    conn.set_visible_to_all_users(job_id, False)
    job_flow = conn.describe_jobflow(job_id)
    job_flow.visibletoallusers.should.equal("false")
예제 #45
0
파일: test_emr.py 프로젝트: tomviner/moto
def test_run_jobflow_with_instance_groups():
    input_groups = dict((g.name, g) for g in input_instance_groups)
    conn = boto.connect_emr()
    job_id = conn.run_jobflow(instance_groups=input_instance_groups,
                              **run_jobflow_args)
    job_flow = conn.describe_jobflow(job_id)
    int(job_flow.instancecount).should.equal(sum(g.num_instances for g in input_instance_groups))
    for instance_group in job_flow.instancegroups:
        expected = input_groups[instance_group.name]
        instance_group.should.have.property('instancegroupid')
        int(instance_group.instancerunningcount).should.equal(expected.num_instances)
        instance_group.instancerole.should.equal(expected.role)
        instance_group.instancetype.should.equal(expected.type)
        instance_group.market.should.equal(expected.market)
        if hasattr(expected, 'bidprice'):
            instance_group.bidprice.should.equal(expected.bidprice)
예제 #46
0
파일: test_emr.py 프로젝트: crosswise/moto
def test_create_job_flow_with_instance_groups():
    conn = boto.connect_emr()

    instance_groups = [InstanceGroup(6, 'TASK', 'c1.medium', 'SPOT', 'spot-0.07', '0.07'),
                       InstanceGroup(6, 'TASK', 'c1.medium', 'SPOT', 'spot-0.07', '0.07')]
    job_id = conn.run_jobflow(
        name='My jobflow',
        log_uri='s3://some_bucket/jobflow_logs',
        steps=[],
        instance_groups=instance_groups
    )

    job_flow = conn.describe_jobflow(job_id)
    int(job_flow.instancecount).should.equal(12)
    instance_group = job_flow.instancegroups[0]
    int(instance_group.instancerunningcount).should.equal(6)
예제 #47
0
def test_describe_job_flows():
    conn = boto.connect_emr()
    job1_id = conn.run_jobflow(name='My jobflow',
                               log_uri='s3://some_bucket/jobflow_logs',
                               steps=[])
    job2_id = conn.run_jobflow(name='My jobflow',
                               log_uri='s3://some_bucket/jobflow_logs',
                               steps=[])

    jobs = conn.describe_jobflows()
    jobs.should.have.length_of(2)

    jobs = conn.describe_jobflows(jobflow_ids=[job2_id])
    jobs.should.have.length_of(1)
    jobs[0].jobflowid.should.equal(job2_id)

    first_job = conn.describe_jobflow(job1_id)
    first_job.jobflowid.should.equal(job1_id)
예제 #48
0
def cancel_job(job):
	"""Cancel job job by terminating the EMR job flow or killing the single machine process."""
	if job.job_type == 'e':
		c = boto.connect_emr()
		c.terminate_jobflow(job.jobflowid)
	else:
		filename = job.get_input_file().name.split('/')[-1]		
		kill_cmd = "pkill -f %s" % filename

		client = paramiko.SSHClient()
		client.load_host_keys('/var/www/known_hosts')
		client.connect('10.203.87.100', 22, 'ec2-user', key_filename='/var/www/nsr-dev.pem')
		stdin, stdout, stderr = client.exec_command(kill_cmd)
		# for line in stdout:
		# 	print line
		# for line in stderr:
		# 	print line
		client.exec_command("echo CANCELLED > ~/status-output/status-s-%s.log" % filename)		
예제 #49
0
def get_step_status(emrid):	
	"""Get percentage complete of EMR job with jobflow id emrid.

	This screen scrapes the EMR tracker page, which is available at the job's masterpublicdnsname on port 9100. Accessing to this page is limited to whitelisted IPs, which can be set in the AWS Security Group settings page.
	"""
	emr = boto.connect_emr()
	job = emr.describe_jobflow(emrid)
	url = job.masterpublicdnsname	
	emr.close()
	
	c = httplib.HTTPConnection(url, 9100)
	c.request("GET", "/jobtracker.jsp")
	response = c.getresponse().read().split("\n")
	status_line = response[36]
	
	statuses = map(float, re.findall("<td>([0-9.]*)%<table", status_line))
	# print >> sys.stderr, statuses
	return sum(statuses)/200. * 90
예제 #50
0
파일: test_emr.py 프로젝트: crosswise/moto
def test_set_termination_protection():
    conn = boto.connect_emr()

    job_id = conn.run_jobflow(
        name='My jobflow',
        log_uri='s3://some_bucket/jobflow_logs',
        steps=[]
    )
    job_flow = conn.describe_jobflow(job_id)
    job_flow.terminationprotected.should.equal(u'None')

    conn.set_termination_protection(job_id, True)

    job_flow = conn.describe_jobflow(job_id)
    job_flow.terminationprotected.should.equal('true')

    conn.set_termination_protection(job_id, False)

    job_flow = conn.describe_jobflow(job_id)
    job_flow.terminationprotected.should.equal('false')
예제 #51
0
파일: test_emr.py 프로젝트: joshp123/moto
def test_modify_instance_groups():
    conn = boto.connect_emr()

    step1 = StreamingStep(
        name='My wordcount example',
        mapper='s3n://elasticmapreduce/samples/wordcount/wordSplitter.py',
        reducer='aggregate',
        input='s3n://elasticmapreduce/samples/wordcount/input',
        output='s3n://output_bucket/output/wordcount_output')

    job_id = conn.run_jobflow(name='My jobflow',
                              log_uri='s3://some_bucket/jobflow_logs',
                              steps=[step1])

    instance_group1 = InstanceGroup(6, 'TASK', 'c1.medium', 'SPOT',
                                    'spot-0.07', '0.07')
    instance_group2 = InstanceGroup(6, 'TASK', 'c1.medium', 'SPOT',
                                    'spot-0.07', '0.07')
    instance_group = conn.add_instance_groups(
        job_id, [instance_group1, instance_group2])
    instance_group_ids = instance_group.instancegroupids.split(",")

    job_flow = conn.describe_jobflows()[0]
    int(job_flow.instancecount).should.equal(12)
    instance_group = job_flow.instancegroups[0]
    int(instance_group.instancerunningcount).should.equal(6)

    conn.modify_instance_groups(instance_group_ids, [2, 3])

    job_flow = conn.describe_jobflows()[0]
    int(job_flow.instancecount).should.equal(5)
    instance_group1 = [
        group for group in job_flow.instancegroups
        if group.instancegroupid == instance_group_ids[0]
    ][0]
    int(instance_group1.instancerunningcount).should.equal(2)
    instance_group2 = [
        group for group in job_flow.instancegroups
        if group.instancegroupid == instance_group_ids[1]
    ][0]
    int(instance_group2.instancerunningcount).should.equal(3)
예제 #52
0
파일: test_emr.py 프로젝트: rocky4570/moto
def test_describe_jobflows():
    conn = boto.connect_emr()
    args = run_jobflow_args.copy()
    expected = {}

    for idx in range(400):
        cluster_name = "cluster" + str(idx)
        args["name"] = cluster_name
        cluster_id = conn.run_jobflow(**args)
        expected[cluster_id] = {"id": cluster_id, "name": cluster_name, "state": "WAITING"}

    # need sleep since it appears the timestamp is always rounded to
    # the nearest second internally
    time.sleep(1)
    timestamp = datetime.now(pytz.utc)
    time.sleep(1)

    for idx in range(400, 600):
        cluster_name = "cluster" + str(idx)
        args["name"] = cluster_name
        cluster_id = conn.run_jobflow(**args)
        conn.terminate_jobflow(cluster_id)
        expected[cluster_id] = {"id": cluster_id, "name": cluster_name, "state": "TERMINATED"}
    jobs = conn.describe_jobflows()
    jobs.should.have.length_of(512)

    for cluster_id, y in expected.items():
        resp = conn.describe_jobflows(jobflow_ids=[cluster_id])
        resp.should.have.length_of(1)
        resp[0].jobflowid.should.equal(cluster_id)

    resp = conn.describe_jobflows(states=["WAITING"])
    resp.should.have.length_of(400)
    for x in resp:
        x.state.should.equal("WAITING")

    resp = conn.describe_jobflows(created_before=timestamp)
    resp.should.have.length_of(400)

    resp = conn.describe_jobflows(created_after=timestamp)
    resp.should.have.length_of(200)
예제 #53
0
파일: test_emr.py 프로젝트: invenia/moto
def test_set_visible_to_all_users():
    conn = boto.connect_emr()

    job_id = conn.run_jobflow(
        name='My jobflow',
        log_uri='s3://some_bucket/jobflow_logs',
        job_flow_role='some-role-arn',
        steps=[],
        visible_to_all_users=False,
    )
    job_flow = conn.describe_jobflow(job_id)
    job_flow.visibletoallusers.should.equal('False')

    conn.set_visible_to_all_users(job_id, True)

    job_flow = conn.describe_jobflow(job_id)
    job_flow.visibletoallusers.should.equal('True')

    conn.set_visible_to_all_users(job_id, False)

    job_flow = conn.describe_jobflow(job_id)
    job_flow.visibletoallusers.should.equal('False')
예제 #54
0
파일: test_emr.py 프로젝트: tomviner/moto
def test_list_clusters():
    conn = boto.connect_emr()

    args = run_jobflow_args.copy()
    args['name'] = 'jobflow1'
    cluster1_id = conn.run_jobflow(**args)
    args['name'] = 'jobflow2'
    cluster2_id = conn.run_jobflow(**args)
    conn.terminate_jobflow(cluster2_id)

    summary = conn.list_clusters()
    clusters = summary.clusters
    clusters.should.have.length_of(2)

    expected = {
        cluster1_id: {
            'id': cluster1_id,
            'name': 'jobflow1',
            'normalizedinstancehours': 0,
            'state': 'WAITING'},
        cluster2_id: {
            'id': cluster2_id,
            'name': 'jobflow2',
            'normalizedinstancehours': 0,
            'state': 'TERMINATED'},
    }

    for x in clusters:
        y = expected[x.id]
        x.id.should.equal(y['id'])
        x.name.should.equal(y['name'])
        int(x.normalizedinstancehours).should.equal(y['normalizedinstancehours'])
        x.status.state.should.equal(y['state'])
        x.status.timeline.creationdatetime.should.be.a(six.string_types)
        if y['state'] == 'TERMINATED':
            x.status.timeline.enddatetime.should.be.a(six.string_types)
        else:
            x.status.timeline.shouldnt.have.property('enddatetime')
        x.status.timeline.readydatetime.should.be.a(six.string_types)
예제 #55
0
def test_cluster_tagging():
    conn = boto.connect_emr()
    job_id = conn.run_jobflow(
        name='My jobflow',
        log_uri='s3://some_bucket/jobflow_logs',
        steps=[],
    )
    cluster_id = job_id
    conn.add_tags(cluster_id, {"tag1": "val1", "tag2": "val2"})

    cluster = conn.describe_cluster(cluster_id)
    cluster.tags.should.have.length_of(2)
    tags = dict((tag.key, tag.value) for tag in cluster.tags)
    tags['tag1'].should.equal('val1')
    tags['tag2'].should.equal('val2')

    # Remove a tag
    conn.remove_tags(cluster_id, ["tag1"])
    cluster = conn.describe_cluster(cluster_id)
    cluster.tags.should.have.length_of(1)
    tags = dict((tag.key, tag.value) for tag in cluster.tags)
    tags['tag2'].should.equal('val2')
예제 #56
0
	if o in ('--spot-bid'):
		params['spot_bid_price']=a
	if o in ('--test'):
		params['test_mode']=True
	
required = ['aws_key','secret','keypair']

for pname in required:
    if not params.get(pname, None):
        print '\nERROR:%s is required' % pname
        usage()

for p, v in params.iteritems():
	print "param:" + `p`+ " value:" + `v`

conn = boto.connect_emr(params['aws_key'],params['secret'])

bootstrap_step1 = BootstrapAction("install_cc", "s3://commoncrawl-public/config64.sh",[params['aws_key'], params['secret']])
bootstrap_step2 = BootstrapAction("configure_hadoop", "s3://elasticmapreduce/bootstrap-actions/configure-hadoop",
	[
	"-m","mapred.tasktracker.map.tasks.maximum=8",
	"-m","mapred.child.java.opts=-XX:ErrorFile=/tmp/hs_err_${mapred.tip.id}.log -Xmx700m -XX:+UseParNewGC -XX:ParallelGCThreads=8 -XX:NewSize=100m -XX:+UseConcMarkSweepGC -XX:+UseTLAB -XX:+CMSIncrementalMode -XX:+CMSIncrementalPacing -XX:CMSIncrementalDutyCycleMin=0 -XX:CMSIncrementalDutyCycle=10"
	])
bootstrap_step3 = BootstrapAction("configure_jobtrackerheap", "s3://elasticmapreduce/bootstrap-actions/configure-daemons",["--jobtracker-heap-size=12096"])

namenode_instance_group = InstanceGroup(1,"MASTER","c1.xlarge","ON_DEMAND","MASTER_GROUP")
core_instance_group = InstanceGroup(params['num_core'],"CORE","c1.xlarge","ON_DEMAND","CORE_GROUP")

instance_groups=[]
if params['num_spot'] <= 0:
	instance_groups=[namenode_instance_group,core_instance_group]
예제 #57
0
파일: test_emr.py 프로젝트: joshp123/moto
def test_create_job_flow():
    conn = boto.connect_emr()

    step1 = StreamingStep(
        name='My wordcount example',
        mapper='s3n://elasticmapreduce/samples/wordcount/wordSplitter.py',
        reducer='aggregate',
        input='s3n://elasticmapreduce/samples/wordcount/input',
        output='s3n://output_bucket/output/wordcount_output')

    step2 = StreamingStep(
        name='My wordcount example2',
        mapper='s3n://elasticmapreduce/samples/wordcount/wordSplitter2.py',
        reducer='aggregate',
        input='s3n://elasticmapreduce/samples/wordcount/input2',
        output='s3n://output_bucket/output/wordcount_output2')

    job_id = conn.run_jobflow(
        name='My jobflow',
        log_uri='s3://some_bucket/jobflow_logs',
        master_instance_type='m1.medium',
        slave_instance_type='m1.small',
        steps=[step1, step2],
    )

    job_flow = conn.describe_jobflow(job_id)
    job_flow.state.should.equal('STARTING')
    job_flow.jobflowid.should.equal(job_id)
    job_flow.name.should.equal('My jobflow')
    job_flow.masterinstancetype.should.equal('m1.medium')
    job_flow.slaveinstancetype.should.equal('m1.small')
    job_flow.loguri.should.equal('s3://some_bucket/jobflow_logs')
    job_flow.visibletoallusers.should.equal('False')
    int(job_flow.normalizedinstancehours).should.equal(0)
    job_step = job_flow.steps[0]
    job_step.name.should.equal('My wordcount example')
    job_step.state.should.equal('STARTING')
    args = [arg.value for arg in job_step.args]
    args.should.equal([
        '-mapper',
        's3n://elasticmapreduce/samples/wordcount/wordSplitter.py',
        '-reducer',
        'aggregate',
        '-input',
        's3n://elasticmapreduce/samples/wordcount/input',
        '-output',
        's3n://output_bucket/output/wordcount_output',
    ])

    job_step2 = job_flow.steps[1]
    job_step2.name.should.equal('My wordcount example2')
    job_step2.state.should.equal('PENDING')
    args = [arg.value for arg in job_step2.args]
    args.should.equal([
        '-mapper',
        's3n://elasticmapreduce/samples/wordcount/wordSplitter2.py',
        '-reducer',
        'aggregate',
        '-input',
        's3n://elasticmapreduce/samples/wordcount/input2',
        '-output',
        's3n://output_bucket/output/wordcount_output2',
    ])
예제 #58
0
def test_instance_groups():
    input_groups = dict((g.name, g) for g in input_instance_groups)

    conn = boto.connect_emr()
    args = run_jobflow_args.copy()
    for key in [
            "master_instance_type", "slave_instance_type", "num_instances"
    ]:
        del args[key]
    args["instance_groups"] = input_instance_groups[:2]
    job_id = conn.run_jobflow(**args)

    jf = conn.describe_jobflow(job_id)
    base_instance_count = int(jf.instancecount)

    conn.add_instance_groups(job_id, input_instance_groups[2:])

    jf = conn.describe_jobflow(job_id)
    int(jf.instancecount).should.equal(
        sum(g.num_instances for g in input_instance_groups))
    for x in jf.instancegroups:
        y = input_groups[x.name]
        if hasattr(y, "bidprice"):
            x.bidprice.should.equal(y.bidprice)
        x.creationdatetime.should.be.a(str)
        # x.enddatetime.should.be.a(str)
        x.should.have.property("instancegroupid")
        int(x.instancerequestcount).should.equal(y.num_instances)
        x.instancerole.should.equal(y.role)
        int(x.instancerunningcount).should.equal(y.num_instances)
        x.instancetype.should.equal(y.type)
        x.laststatechangereason.should.be.a(str)
        x.market.should.equal(y.market)
        x.name.should.be.a(str)
        x.readydatetime.should.be.a(str)
        x.startdatetime.should.be.a(str)
        x.state.should.equal("RUNNING")

    for x in conn.list_instance_groups(job_id).instancegroups:
        y = input_groups[x.name]
        if hasattr(y, "bidprice"):
            x.bidprice.should.equal(y.bidprice)
        # Configurations
        # EbsBlockDevices
        # EbsOptimized
        x.should.have.property("id")
        x.instancegrouptype.should.equal(y.role)
        x.instancetype.should.equal(y.type)
        x.market.should.equal(y.market)
        x.name.should.equal(y.name)
        int(x.requestedinstancecount).should.equal(y.num_instances)
        int(x.runninginstancecount).should.equal(y.num_instances)
        # ShrinkPolicy
        x.status.state.should.equal("RUNNING")
        x.status.statechangereason.code.should.be.a(str)
        x.status.statechangereason.message.should.be.a(str)
        x.status.timeline.creationdatetime.should.be.a(str)
        # x.status.timeline.enddatetime.should.be.a(str)
        x.status.timeline.readydatetime.should.be.a(str)

    igs = dict((g.name, g) for g in jf.instancegroups)

    conn.modify_instance_groups(
        [igs["task-1"].instancegroupid, igs["task-2"].instancegroupid], [2, 3])
    jf = conn.describe_jobflow(job_id)
    int(jf.instancecount).should.equal(base_instance_count + 5)
    igs = dict((g.name, g) for g in jf.instancegroups)
    int(igs["task-1"].instancerunningcount).should.equal(2)
    int(igs["task-2"].instancerunningcount).should.equal(3)
예제 #59
0
import datetime
import os

import boto
from boto.emr.instance_group import InstanceGroup
from boto.emr.step import InstallPigStep, PigStep


conn = boto.connect_emr()

instance_groups = [
    InstanceGroup(1, 'MASTER', 'm1.small', 'SPOT', '[email protected]', '0.10'),
    InstanceGroup(2, 'CORE', 'm1.small', 'SPOT', '[email protected]', '0.10'),
]

pig_file = 's3://elasticmapreduce/samples/pig-apache/do-reports2.pig'
INPUT = 's3://elasticmapreduce/samples/pig-apache/input/'
OUTPUT = ('s3://org.unencrypted.emr.output/apache_sample/%s' %
          datetime.datetime.utcnow().strftime("%s"))

print """\
Running pig job with settings:

    SCRIPT={script}
    INPUT={input}
    OUPUT={output}
""".format(script=pig_file, input=INPUT, output=OUTPUT)

pig_args = ['-p', 'INPUT=%s' % INPUT,
            '-p', 'OUTPUT=%s' % OUTPUT]
예제 #60
0
def test_steps():
    input_steps = [
        StreamingStep(
            name="My wordcount example",
            mapper="s3n://elasticmapreduce/samples/wordcount/wordSplitter.py",
            reducer="aggregate",
            input="s3n://elasticmapreduce/samples/wordcount/input",
            output="s3n://output_bucket/output/wordcount_output",
        ),
        StreamingStep(
            name="My wordcount example & co.",
            mapper="s3n://elasticmapreduce/samples/wordcount/wordSplitter2.py",
            reducer="aggregate",
            input="s3n://elasticmapreduce/samples/wordcount/input2",
            output="s3n://output_bucket/output/wordcount_output2",
        ),
    ]

    # TODO: implementation and test for cancel_steps

    conn = boto.connect_emr()
    cluster_id = conn.run_jobflow(steps=[input_steps[0]], **run_jobflow_args)

    jf = conn.describe_jobflow(cluster_id)
    jf.steps.should.have.length_of(1)

    conn.add_jobflow_steps(cluster_id, [input_steps[1]])

    jf = conn.describe_jobflow(cluster_id)
    jf.steps.should.have.length_of(2)
    for step in jf.steps:
        step.actiononfailure.should.equal("TERMINATE_JOB_FLOW")
        list(arg.value for arg in step.args).should.have.length_of(8)
        step.creationdatetime.should.be.a(str)
        # step.enddatetime.should.be.a(str)
        step.jar.should.equal(
            "/home/hadoop/contrib/streaming/hadoop-streaming.jar")
        step.laststatechangereason.should.be.a(str)
        step.mainclass.should.equal("")
        step.name.should.be.a(str)
        # step.readydatetime.should.be.a(str)
        # step.startdatetime.should.be.a(str)
        step.state.should.be.within(["RUNNING", "PENDING"])

    expected = dict((s.name, s) for s in input_steps)

    steps = conn.list_steps(cluster_id).steps
    for x in steps:
        y = expected[x.name]
        # actiononfailure
        list(arg.value for arg in x.config.args).should.equal([
            "-mapper",
            y.mapper,
            "-reducer",
            y.reducer,
            "-input",
            y.input,
            "-output",
            y.output,
        ])
        x.config.jar.should.equal(
            "/home/hadoop/contrib/streaming/hadoop-streaming.jar")
        x.config.mainclass.should.equal("")
        # properties
        x.should.have.property("id").should.be.a(str)
        x.name.should.equal(y.name)
        x.status.state.should.be.within(["RUNNING", "PENDING"])
        # x.status.statechangereason
        x.status.timeline.creationdatetime.should.be.a(str)
        # x.status.timeline.enddatetime.should.be.a(str)
        # x.status.timeline.startdatetime.should.be.a(str)

        x = conn.describe_step(cluster_id, x.id)
        list(arg.value for arg in x.config.args).should.equal([
            "-mapper",
            y.mapper,
            "-reducer",
            y.reducer,
            "-input",
            y.input,
            "-output",
            y.output,
        ])
        x.config.jar.should.equal(
            "/home/hadoop/contrib/streaming/hadoop-streaming.jar")
        x.config.mainclass.should.equal("")
        # properties
        x.should.have.property("id").should.be.a(str)
        x.name.should.equal(y.name)
        x.status.state.should.be.within(["RUNNING", "PENDING"])
        # x.status.statechangereason
        x.status.timeline.creationdatetime.should.be.a(str)
        # x.status.timeline.enddatetime.should.be.a(str)
        # x.status.timeline.startdatetime.should.be.a(str)

    @requires_boto_gte("2.39")
    def test_list_steps_with_states():
        # boto's list_steps prior to 2.39 has a bug that ignores
        # step_states argument.
        steps = conn.list_steps(cluster_id).steps
        step_id = steps[0].id
        steps = conn.list_steps(cluster_id, step_states=["RUNNING"]).steps
        steps.should.have.length_of(1)
        steps[0].id.should.equal(step_id)

    test_list_steps_with_states()