예제 #1
0
    def __init__(self,
                 team_id,
                 access_key,
                 secret_key,
                 bucket='cs144students'):
        """
        (constructor)

        Creates a new instance of the Rankmaniac class for a specific
        team using the provided credentials.

        Arguments:
            team_id       <str>     the team identifier, which may be
                                    differ slightly from the actual team
                                    name.

            access_key    <str>     the AWS access key identifier.
            secret_key    <str>     the AWS secret acess key.

        Keyword arguments:
            bucket        <str>     the S3 bucket name.
        """

        region = RegionInfo(None, self.DefaultRegionName,
                            self.DefaultRegionEndpoint)

        self._s3_bucket = bucket
        self._s3_conn = S3Connection(access_key, secret_key)
        self._emr_conn = EmrConnection(access_key, secret_key, region=region)

        self.team_id = team_id
        self.job_id = None

        self._reset()
        self._num_instances = 1
예제 #2
0
def get_job_flow_objects(conf_path, max_days_ago=None, now=None):
    """Get relevant job flow information from EMR.

    Args:
        conf_path: is a string that is either None or has an alternate
            path to load the configuration file.

        max_days_ago: A float where if set, dont fetch job flows created
            longer than this many days ago.

        now: the current UTC time as a datetime.datetime object.
            defaults to the current time.
    Returns:
        job_flows: A list of boto job flow objects.
    """
    if now is None:
        now = datetime.datetime.utcnow()
    emr_conn = None
    emr_conn = EmrConnection()
    # if --max-days-ago is set, only look at recent jobs
    created_after = None
    if max_days_ago is not None:
        created_after = now - datetime.timedelta(days=max_days_ago)

    return describe_all_job_flows(emr_conn, created_after=created_after)
def create_emr(R):
    if not boto.config.has_section('Boto'):
        boto.config.add_section('Boto')
    boto.config.set('Boto', 'https_validate_certificates', 'False')
    step = StreamingStep(name='MC_Method example',
                         cache_files=['s3n://bucket774/map.py#map.py'],
                         mapper='map.py',
                         input='s3://bucket774/input/',
                         output='s3://bucket774/output/')
    conn = EmrConnection(access_id, access_key)
    instance_groups = []
    instance_groups.append(
        InstanceGroup(num_instances=1,
                      role="MASTER",
                      type='m4.large',
                      market="ON_DEMAND",
                      name="Master nodes"))
    if R > 1:
        instance_groups.append(
            InstanceGroup(num_instances=R - 1,
                          role="CORE",
                          type='m4.large',
                          market="ON_DEMAND",
                          name="Slave nodes"))
    cluster_id = conn.run_jobflow(name='test MC_method run',
                                  instance_groups=instance_groups,
                                  enable_debugging=False,
                                  steps=[step],
                                  visible_to_all_users=True,
                                  keep_alive=True,
                                  job_flow_role="EMR_EC2_DefaultRole",
                                  service_role="EMR_DefaultRole",
                                  hadoop_version='2.4.0',
                                  log_uri='s3://bucket774/log')
    return cluster_id, conn
예제 #4
0
 def __init__(self, prop):
     '''Constructor, initialize EMR connection.'''
     self.prop = prop
     self.conn = EmrConnection(self.prop.ec2.key, self.prop.ec2.secret)
     self.jobid = None
     self.retry = 0
     self.level = 0
     self.last_update = -1
예제 #5
0
    def __init__(self, spec_filename="spec.json"):
        import boto
        from boto.emr.connection import EmrConnection, RegionInfo

        super(EmrRuntime, self).__init__(spec_filename)
        p = self.settings.Param
        self.s3_conn = boto.connect_s3(p.AWS_ACCESS_KEY_ID, p.AWS_ACCESS_KEY_SECRET)
        self.s3_bucket = self.s3_conn.get_bucket(p.S3_BUCKET)
        self.region = p.AWS_Region
        self.emr_conn = EmrConnection(p.AWS_ACCESS_KEY_ID, p.AWS_ACCESS_KEY_SECRET,
                                      region = RegionInfo(name = self.region,
                                                          endpoint = self.region + '.elasticmapreduce.amazonaws.com'))
        self.job_flow_id = p.EMR_jobFlowId
예제 #6
0
    def __init__(self, team_id, access_key, secret_key):
        '''Rankmaniac class constructor

        Creates a new instance of the Rankmaniac Wrapper for a specific
        team.

        Arguments:
            team_id         string      the team ID.
            access_key      string      AWS access key.
            secret_key      string      AWS secret key.
        '''

        self.s3_bucket = 'cs144caltech'

        self.team_id = team_id
        self.emr_conn = EmrConnection(access_key, secret_key)
        self.s3_conn = S3Connection(access_key, secret_key)
        self.job_id = None
예제 #7
0
def get_internal_ips_from_emr(cluster_id, cr):
    """
    Retrieves a list of internal IP addresses for a given EMR cluster
    """

    #  Open connection to EMR
    conn = EmrConnection(
        cr.get_config("aws_access_key"),
        cr.get_config("aws_secret_key"),
        region=RegionInfo(name=cr.get_config("aws_region"),
                          endpoint=cr.get_config("aws_region") +
                          ".elasticmapreduce.amazonaws.com"))

    #  Build list of internal ips from list_instances EMR API
    emr_internal_ips = []
    emr_instances = conn.list_instances(cluster_id).instances
    for instance in emr_instances:
        emr_internal_ips.append(instance.privateipaddress)

    return emr_internal_ips
예제 #8
0
    def __init__(self, parameters):
        try: 
            self.region_name = parameters["region_name"]
            self.access_key = parameters["access_key"]
            self.secret_key = parameters["secret_key"]
            self.ec2_keypair_name = parameters["ec2_keypair_name"]
            self.base_bucket = parameters["base_bucket"]
            self.log_dir = parameters["log_dir"]
            self.emr_status_wait = parameters["emr_status_wait"]
            self.step_status_wait = parameters["step_status_wait"]
            self.emr_cluster_name = parameters["emr_cluster_name"]
        except:
            logging.error("Something went wrong initializing EmrManager")
            sys.exit()

        # Establishing EmrConnection
        self.connection = EmrConnection(self.access_key, self.secret_key,
                             region=RegionInfo(name=self.region_name,
                             endpoint=self.region_name + '.elasticmapreduce.amazonaws.com'))

        self.log_bucket_name = self.base_bucket + self.log_dir
예제 #9
0
    def __init__(self,
                 region_name='us-east-1',
                 aws_access_key_id=None,
                 aws_secret_access_key=None):

        # If the access key is not specified, get it from the luigi config.cfg file
        if not aws_access_key_id:
            aws_access_key_id = luigi.configuration.get_config().get(
                'aws', 'aws_access_key_id')

        if not aws_secret_access_key:
            aws_secret_access_key = luigi.configuration.get_config().get(
                'aws', 'aws_secret_access_key')

        # Create the region in which to run
        region_endpoint = u'elasticmapreduce.%s.amazonaws.com' % (region_name)
        region = RegionInfo(name=region_name, endpoint=region_endpoint)

        self.emr_connection = EmrConnection(
            aws_access_key_id=aws_access_key_id,
            aws_secret_access_key=aws_secret_access_key,
            region=region)
예제 #10
0
    def run(self):
        """Run the Hive job on EMR cluster
        """
        #  copy the data source to a new object
        #  (Hive deletes/moves the original)
        copy_s3_file(self.input_path, self.data_path)

        # and create the hive script
        self._generate_and_upload_hive_script()

        logger.info("Waiting {} seconds for S3 eventual consistency".format(
            self.s3_sync_wait_time))
        time.sleep(self.s3_sync_wait_time)

        # TODO more options like setting aws region
        conn = EmrConnection(self.aws_access_key_id,
                             self.aws_secret_access_key)

        setup_step = InstallHiveStep(self.hive_version)
        run_step = HiveStep(self.job_name, self.script_path)

        cluster_id = conn.run_jobflow(
            self.job_name,
            self.log_path,
            action_on_failure='CANCEL_AND_WAIT',
            master_instance_type=self.master_instance_type,
            slave_instance_type=self.slave_instance_type,
            ami_version=self.ami_version,
            num_instances=self.num_instances,
            job_flow_role=self.iam_instance_profile,
            service_role=self.iam_service_role)

        conn.add_jobflow_steps(cluster_id, [setup_step, run_step])

        logger.info("Job started on cluster {0}".format(cluster_id))

        self._wait_for_job_to_complete(conn, cluster_id)

        logger.info("Output file is in: {0}".format(self.output_path))
예제 #11
0
파일: __init__.py 프로젝트: gmohre/awsbook
 def __init__(self, user=EMR_USER, key=EMR_KEY):
     self.conn = EmrConnection(user, key)
예제 #12
0
                               PageviewsBySubredditAndPath,
                               PageviewsByLanguage, ClickthroughsByCodename,
                               TargetedClickthroughsByCodename,
                               AdImpressionsByCodename,
                               TargetedImpressionsByCodename)

RAW_LOG_DIR = g.RAW_LOG_DIR
PROCESSED_DIR = g.PROCESSED_DIR
AGGREGATE_DIR = g.AGGREGATE_DIR
AWS_LOG_DIR = g.AWS_LOG_DIR

# the "or None" business is so that a blank string becomes None to cause boto
# to look for credentials in other places.
s3_connection = S3Connection(g.TRAFFIC_ACCESS_KEY or None, g.TRAFFIC_SECRET_KEY
                             or None)
emr_connection = EmrConnection(g.TRAFFIC_ACCESS_KEY or None,
                               g.TRAFFIC_SECRET_KEY or None)

traffic_categories = (SitewidePageviews, PageviewsBySubreddit,
                      PageviewsBySubredditAndPath, PageviewsByLanguage,
                      ClickthroughsByCodename, TargetedClickthroughsByCodename,
                      AdImpressionsByCodename, TargetedImpressionsByCodename)

traffic_subdirectories = {
    SitewidePageviews: 'sitewide',
    PageviewsBySubreddit: 'subreddit',
    PageviewsBySubredditAndPath: 'srpath',
    PageviewsByLanguage: 'lang',
    ClickthroughsByCodename: 'clicks',
    TargetedClickthroughsByCodename: 'clicks_targeted',
    AdImpressionsByCodename: 'thing',
    TargetedImpressionsByCodename: 'thingtarget',
예제 #13
0
import os
import sys
import dateutil.parser
from dateutil import tz
from boto.emr.connection import EmrConnection
from boto.s3.connection import S3Connection
from ucsd_bigdata.credentials import Credentials
import gzip

if __name__ == "__main__":
    credentials = Credentials()
    aws_access_key_id = credentials.aws_access_key_id
    aws_secret_access_key = credentials.aws_secret_access_key

    emr_conn = EmrConnection(aws_access_key_id, aws_secret_access_key)

    # List EMR Clusters
    clusters = emr_conn.list_clusters(cluster_states=["RUNNING", "WAITING"])

    for index, cluster in enumerate(clusters.clusters):
        print "[%s] %s" % (index, cluster.id)

    # if there is a command line arg, use it for the cluster_id
    if len(sys.argv) > 1:
        cluster_id = sys.argv[1]
    else:
        if len(clusters.clusters) == 0:
            sys.exit("No EMR clusters running.")
        selected_cluster = input("Select a Cluster: ")
        cluster_id = clusters.clusters[int(selected_cluster)].id
예제 #14
0
k = Key(b)
k.key = 'reducer.py'
k.set_contents_from_filename('/Users/winteram/Documents/Teaching/reducer.py')
k.close()

# <codecell>

for word in b.list():
    print word

# <codecell>

### Running code with EMR

#emrcon = EmrConnection('<aws access key>', '<aws secret key>')
emrcon = EmrConnection('AKIAJRV3RN6NXQTSSTBA',
                       '3e212d6rs99xtiPgwKnfN1QD30WZk2hJwCWjMcGc')

# <codecell>

# Using EMR's wordcount example
step = StreamingStep(
    name='My wordcount example',
    mapper='s3n://elasticmapreduce/samples/wordcount/wordSplitter.py',
    reducer='aggregate',
    input='s3n://elasticmapreduce/samples/wordcount/input',
    output='s3n://wambia660fall2013/output/wordcount_output')

# <codecell>

jobid = emrcon.run_jobflow(name='Word Count Example',
                           log_uri='s3://wambia660fall2013/logs',
예제 #15
0
 def _emr_connect(self):
     """Connect to emr.
     """
     self.emr_conn = EmrConnection(
         aws_access_key_id=self.access_key_id,
         aws_secret_access_key=self.secret_access_key)
예제 #16
0
def create_emr_cluster(cr):
    """
    @PARAM:  Cluster configuration reader object
    Creates an EMR cluster given a set of configuration parameters
    Return:  EMR Cluster ID
    """

    #region = cr.get_config("aws_region")
    #conn = boto.emr.connect_to_region(region)
    conn = EmrConnection(
        cr.get_config("aws_access_key"),
        cr.get_config("aws_secret_key"),
        region=RegionInfo(name=cr.get_config("aws_region"),
                          endpoint=cr.get_config("aws_region") +
                          ".elasticmapreduce.amazonaws.com"))

    #  Create list of instance groups:  master, core, and task
    instance_groups = []
    instance_groups.append(
        InstanceGroup(num_instances=cr.get_config("emr_master_node_count"),
                      role="MASTER",
                      type=cr.get_config("emr_master_node_type"),
                      market=cr.get_config("emr_market_type"),
                      name="Master Node"))

    instance_groups.append(
        InstanceGroup(num_instances=cr.get_config("emr_core_node_count"),
                      role="CORE",
                      type=cr.get_config("emr_core_node_type"),
                      market=cr.get_config("emr_market_type"),
                      name="Core Node"))

    #  Only create task nodes if specifcally asked for
    if cr.get_config("emr_task_node_count") > 0:
        instance_groups.append(
            InstanceGroup(num_instances=cr.get_config("emr_task_node_count"),
                          role="TASK",
                          type=cr.get_config("emr_task_node_type"),
                          market=cr.get_config("emr_market_type"),
                          name="Task Node"))

    print "Creating EMR Cluster with instance groups: {0}".format(
        instance_groups)

    #  Use these params to add overrrides, these will go away in Boto3
    api_params = {
        "Instances.Ec2SubnetId": cr.get_config("aws_subnet_id"),
        "ReleaseLabel": cr.get_config("emr_version")
    }

    #  Add step to load data
    step_args = [
        "s3-dist-cp", "--s3Endpoint=s3-us-west-1.amazonaws.com",
        "--src=s3://alpine-qa/automation/automation_test_data/",
        "--dest=hdfs:///automation_test_data", "--srcPattern=.*[a-zA-Z,]+"
    ]
    step = JarStep(name="s3distcp for data loading",
                   jar="command-runner.jar",
                   step_args=step_args,
                   action_on_failure="CONTINUE")

    cluster_id = conn.run_jobflow(
        cr.get_config("emr_cluster_name"),
        instance_groups=instance_groups,
        action_on_failure="TERMINATE_JOB_FLOW",
        keep_alive=True,
        enable_debugging=True,
        log_uri=cr.get_config("emr_log_uri"),
        #hadoop_version = "Amazon 2.7.2",
        #ReleaseLabel = "emr-5.0.0",
        #ami_version = "5.0.0",
        steps=[step],
        bootstrap_actions=[],
        ec2_keyname=cr.get_config("ec2_keyname"),
        visible_to_all_users=True,
        job_flow_role="EMR_EC2_DefaultRole",
        service_role="EMR_DefaultRole",
        api_params=api_params)

    print "EMR Cluster created, cluster id: {0}".format(cluster_id)
    state = conn.describe_cluster(cluster_id).status.state
    while state != u'COMPLETED' and state != u'SHUTTING_DOWN' and state != u'FAILED' and state != u'WAITING':
        #sleeping to recheck for status.
        time.sleep(5)
        state = conn.describe_cluster(cluster_id).status.state
        print "State is: {0}, sleeping 5s...".format(state)

    if state == u'SHUTTING_DOWN' or state == u'FAILED':
        return "ERROR"

    #Check if the state is WAITING. Then launch the next steps
    if state == u'WAITING':
        #Finding the master node dns of EMR cluster
        master_dns = conn.describe_cluster(cluster_id).masterpublicdnsname
        print "DNS Name: {0}".format(master_dns)
        return cluster_id
예제 #17
0
from boto.emr.connection import EmrConnection
from boto.emr.step import InstallPigStep, PigStep

AWS_ACCESS_KEY = ''  # REQUIRED
AWS_SECRET_KEY = ''  # REQUIRED
conn = EmrConnection(AWS_ACCESS_KEY, AWS_SECRET_KEY)

pig_file = 's3://elasticmapreduce/samples/pig-apache/do-reports2.pig'
INPUT = 's3://elasticmapreduce/samples/pig-apache/input/access_log_1'
OUTPUT = ''  # REQUIRED, S3 bucket for job output

pig_args = ['-p', 'INPUT=%s' % INPUT, '-p', 'OUTPUT=%s' % OUTPUT]
pig_step = PigStep('Process Reports', pig_file, pig_args=pig_args)
steps = [InstallPigStep(), pig_step]

conn.run_jobflow(name='report test',
                 steps=steps,
                 hadoop_version='0.20.205',
                 ami_version='latest',
                 num_instances=2,
                 keep_alive=False)
예제 #18
0
def create_data_source_variable(cluster_id, cr):
    """
    Creates a data source variable .json file using the cluster_id of an EMR cluster_id
    @PARAM:  cluster_id:  ID of an EMR cluster
    return:  True if success, creates a file in the pwd 'default_emr.json'

    Object created should look like:

    HADOOP_DATA_SOURCE_NAME="emr_data_source"
    HADOOP_DATA_SOURCE_DISTRO="Cloudera CDH5.4-5.7"
    HADOOP_DATA_SOURCE_HOST="emr_master_dns_hostname"
    HADOOP_DATA_SOURCE_PORT=8020
    HADOOP_DATA_SOURCE_USER="******"
    HADOOP_DATA_SOURCE_GROUP="hadoop"
    HADOOP_DATA_SOURCE_JT_HOST="emr_master_dns_hostname"
    HADOOP_DATA_SOURCE_JT_PORT=8032
    CONNECTION_PARAMETERS='[{"key":"mapreduce.jobhistory.address", "value":"0.0.0.0:10020"}, ' \
                            '{"key":"mapreduce.jobhistory.webapp.address", "value":"cdh5hakerberosnn.alpinenow.local:19888"}, ' \
                            '{"key":"yarn.app.mapreduce.am.staging-dir", "value":"/tmp/hadoop-yarn/staging"}, ' \
                            '{"key":"yarn.resourcemanager.admin.address", "value":"cdh5hakerberosnn.alpinenow.local:8033"}, ' \
                            '{"key":"yarn.resourcemanager.resource-tracker.address", "value":"cdh5hakerberosnn.alpinenow.local:8031"}, ' \
                            '{"key":"yarn.resourcemanager.scheduler.address", "value":"cdh5hakerberosnn.alpinenow.local:8030"}]'

    """
    conn = EmrConnection(
        cr.get_config("aws_access_key"),
        cr.get_config("aws_secret_key"),
        region=RegionInfo(name=cr.get_config("aws_region"),
                          endpoint=cr.get_config("aws_region") +
                          ".elasticmapreduce.amazonaws.com"))

    emr_cluster = conn.describe_cluster(cluster_id)
    master_dns_hostname = emr_cluster.masterpublicdnsname

    # Build up connection parameters
    conn_params = []
    conn_params.append({
        "key": "mapreduce.jobhistory.address",
        "value": "{0}:10020".format(master_dns_hostname)
    })
    conn_params.append({
        "key": "mapreduce.jobhistory.webapp.address",
        "value": "{0}:19888".format(master_dns_hostname)
    })
    conn_params.append({
        "key": "yarn.app.mapreduce.am.staging-dir",
        "value": "/user"
    })
    conn_params.append({
        "key": "yarn.resourcemanager.admin.address",
        "value": "{0}:8033".format(master_dns_hostname)
    })
    conn_params.append({
        "key": "yarn.resourcemanager.scheduler.address",
        "value": "{0}:8030".format(master_dns_hostname)
    })
    conn_params_str = "CONNECTION_PARAMETERS=\"{0}\"".format(conn_params)
    email_str = "EMAIL=\"avalanche_{0}.alpinenow.com\"".format(
        random.randint(1, 99999))

    with open("emr_default.conf", "w") as f:
        f.writelines("HADOOP_DATA_SOURCE_NAME=\"{0}\"\n".format(
            cr.get_config("emr_cluster_name")))
        f.writelines(
            "HADOOP_DATA_SOURCE_DISTRO=\"{0}\"\n".format("Amazon EMR5"))
        f.writelines(
            "HADOOP_DATA_SOURCE_HOST=\"{0}\"\n".format(master_dns_hostname))
        f.writelines("HADOOP_DATA_SOURCE_POST=\"8020\"\n")
        f.writelines("HADOOP_DATA_SOURCE_USER=\"hdfs\"\n")
        f.writelines("HADOOP_DATA_SOURCE_GROUP=\"hadoop\"\n")
        f.writelines(
            "HADOOP_DATA_SOURCE_JT_HOST=\"{0}\"\n".format(master_dns_hostname))
        f.writelines("HADOOP_DATA_SOURCE_JT_PORT=\"8032\"\n")
        f.writelines(email_str)
        f.writelines(conn_params_str)
예제 #19
0
from boto.emr.bootstrap_action import BootstrapAction
from boto.emr.connection import EmrConnection

# Description:
# BootstrapAction is an object reperesenting a bootstrap action in Elastic Map
# Reduce (EMR), a script that gets run before the EMR job executes.

# initialize a bootstrap action
bootstrapSetup = BootstrapAction("Bootstrap Name",
                                 "s3://<my-bucket>/<my-bootstrap-action>",
                                 ["arg1=hello", "arg2=world"])

# initialize emr connection
emr_job = EmrConnection("<aws-access-key-id>", "<aws-secret-access-key>")

# run emr job flow with defined bootstrap action
emr_job.run_jobflow(bootstrap_actions=[bootstrapSetup])
예제 #20
0
from boto.emr.connection import EmrConnection
from boto.emr.step import StreamingStep
import boto

AWS_KEY='AKIAIQ7VG4UORIN75ZSA'
AWS_SECRET='jzxajGx8gzwX+ymYXJ0/5heCjkPtWLQkICYRn7Vj'

conn = EmrConnection(AWS_KEY, AWS_SECRET)

step = StreamingStep(name='My wordcount example',
                      mapper='s3n://css739/wordcount/bigramSplitter.py',
                      reducer='aggregate',
                      input='s3n://smalldata/wikipedia_titles.txt',
                      output='s3n://css739/wordcount/bigram_count_output2',
                      cache_files=['s3n://css739/wordcount/english_stoplist.py'])
                      
                      
jobid = conn.run_jobflow(name='My jobflow', log_uri='s3n://css739/wordcount/jobflow_logs',steps=[step])

conn.describe_jobflow(jobid).state
    def post(self):
        if not boto.config.has_section('Boto'):
            boto.config.add_section('Boto')
        boto.config.set('Boto', 'https_validate_certificates', 'False')
        note = ''
        data_para = [0, 0, 0, 0, 0]
        s3_connection = S3Connection(access_id, access_key)
        bucket = s3_connection.get_bucket('bucket774')
        k = Key(bucket)
        k.key = 'temp_para.json'
        temp_para = json.loads(k.get_contents_as_string())
        if (temp_para[6] == 1):
            k.key = 'cluster_id'
            cluster_id = k.get_contents_as_string()
            conn = EmrConnection(access_id, access_key)
            if (temp_para[7] == 0):
                status = conn.describe_cluster(cluster_id)
                if (status.status.state == 'WAITING'):
                    PYdata = get_output()
                    conn.terminate_jobflow(cluster_id)
                    data = in_circle_to_pi(PYdata, temp_para[0])
                    k.key = 'temp_para.json'
                    temp_para[6] = 0
                    k.set_contents_from_string(json.dumps(temp_para))
                    data_para[0:4] = temp_para[0:4]
                    data_para[4] = json.loads(data)[-1]
                    note = 'last emr job done, reslut have been updated'
                    save_result(data, json.dumps(data_para))

                else:
                    note = 'last emr calculation havet finished,please waitting.'
                    k.key = 'record.json'
                    data = k.get_contents_as_string()
                    k.key = 'record_para.json'
                    data_para_json = k.get_contents_as_string()
                    data_para = json.loads(data_para_json)
            elif (temp_para[7] == 1):
                status = conn.describe_cluster(cluster_id)
                if (status.status.state == 'WAITING'):
                    k.key = 'temp_data.json'
                    PYdata = np.array(json.loads(k.get_contents_as_string()))
                    PYdata += get_output()
                    if (round(
                            np.sum(PYdata) / (temp_para[3] * temp_para[5]),
                            temp_para[4]) == round(math.pi, temp_para[4])):
                        for i in range(1, len(PYdata)):
                            PYdata[i] += PYdata[i - 1]
                            PYdata[i - 1] /= temp_para[0] * (i) * temp_para[5]
                        PYdata[len(PYdata) -
                               1] /= temp_para[0] * len(PYdata) * temp_para[5]
                        data = json.dumps(
                            PYdata.tolist())  #covernt numpy array to list

                        k.key = 'temp_para.json'
                        temp_para[6] = 0
                        k.set_contents_from_string(json.dumps(temp_para))
                        data_para[0:4] = temp_para[0:4]
                        data_para[4] = json.loads(data)[-1]
                        conn.terminate_jobflow(cluster_id)
                        note = 'last emr job done,result have been updated'
                        save_result(data, json.dumps(data_para))
                    else:
                        note = str(np.sum(PYdata)) + ',' + str(
                            temp_para[3]) + ',' + str(temp_para[5])
                        add_step_emr(conn, cluster_id)
                        save_temp_result(PYdata)
                        for key in bucket.list(prefix='output/'):
                            key.delete()
                        temp_para[5] += 1
                        k.key = 'temp_para.json'
                        k.set_contents_from_string(json.dumps(temp_para))
                        #note='havet find the given accuracy in last run, keep working'
                        k.key = 'record.json'
                        data = k.get_contents_as_string()
                        k.key = 'record_para.json'
                        data_para_json = k.get_contents_as_string()
                        data_para = json.loads(data_para_json)
                else:
                    note = 'last emr calculation havet finished,please waitting.'
                    k.key = 'record.json'
                    data = k.get_contents_as_string()
                    k.key = 'record_para.json'
                    data_para_json = k.get_contents_as_string()
                    data_para = json.loads(data_para_json)
        else:
            k.key = 'record.json'
            data = k.get_contents_as_string()
            k.key = 'record_para.json'
            data_para_json = k.get_contents_as_string()
            data_para = json.loads(data_para_json)

        doRender(
            self, 'chart.htm', {
                'Data': data,
                'shots_each_threat': data_para[0],
                'R': data_para[1],
                'Q': data_para[2],
                'pi': math.pi,
                'shots': data_para[3],
                'result': data_para[4],
                'note': note
            })