Ejemplo n.º 1
0
    def script_runner(self,
                      user_pem=None,
                      user_script_name=None,
                      quiet_mode=False):
        """
        :return:
        """
        script_name = user_script_name if user_script_name is not None else self.config[
            "ScriptToRun"]["File"]
        pem_path = user_pem if user_pem is not None else self.config[
            "PemFilePath"]

        if script_name is not None:
            if pem_path is not None:
                job_flow_id = self.find_job_flow_id()
                response = self.emr.list_instances(ClusterId=job_flow_id, )
                response_code = response['ResponseMetadata']['HTTPStatusCode']
                if response_code == 200:

                    hosts = self.active_instances(response)

                    print(hosts)

                    client = ParallelSSHClient(hosts,
                                               user='******',
                                               pkey=pem_path)

                    if script_name.startswith("/"):
                        # handle absolute path
                        to_script_name = "/home/hadoop/{}".format(
                            os.path.basename(script_name))
                        from_script_name = script_name
                    else:
                        # handle relative path
                        to_script_name = "/home/hadoop/{}".format(script_name)
                        from_script_name = os.path.join(
                            os.getcwd(), script_name)

                    logger.info("Copying script {} to {}".format(
                        from_script_name, to_script_name))

                    copy_files = client.copy_file(from_script_name,
                                                  to_script_name)
                    joinall(copy_files, raise_error=True)

                    logger.info("Finished copying script {} to {}".format(
                        from_script_name, to_script_name))

                    logger.info("Running script {}".format(to_script_name))

                    output = client.run_command("chmod +x {} && {}".format(
                        to_script_name, to_script_name),
                                                sudo=True)

                    for host, host_output in output.items():
                        if quiet_mode:
                            for line in host_output.stderr:
                                print(line)
                        else:
                            for line in host_output.stdout:
                                print(line)

                    logger.info("Finished script {}".format(to_script_name))

                    return hosts

                else:
                    raise ValueError(
                        "Could not list instances (status code {})".format(
                            response))
            else:
                raise ValueError(
                    'pem_file_path is not specified in emrcliconfig_inst_fleets.yaml "pem_file_path:%s"'
                    % pem_path)
        else:
            raise ValueError("script runner shell script not specified")
Ejemplo n.º 2
0
    def install(self, user_pem=None, quiet_mode=False):
        pem_path = user_pem if user_pem is not None else self.config[
            "PemFilePath"]
        cluster_id = self.find_job_flow_id()
        desc_cluster = self.emr.describe_cluster(ClusterId=cluster_id)
        cluster_state = desc_cluster['Cluster']['Status']['State']
        if cluster_state not in ['WAITING', 'RUNNING']:
            raise ValueError("Cluster is not active")
        tags_list = desc_cluster['Cluster']['Tags']

        fail_check = True
        valid_description = ["env=local"]
        valid_names = ['local']

        for tag in tags_list:
            if 'Description' in tag['Key'] and any(
                    value in tag['Value'] for value in valid_description):
                fail_check = False
                break
            if 'Name' in tag['Key'] and any(name in tag['Value']
                                            for name in valid_names):
                fail_check = False
                break

        if not fail_check:
            print("Cluster tags should contain Key=Name, Value='local']")
            print(
                "Cluster tags should contain Key=Description, Value='env=local']"
            )
            raise ValueError(
                "Error: Local build can not deployed on this cluster {0}".
                format(cluster_id))

        tar_file_nl = HandleEMRCommands.build()
        tar_file_name = tar_file_nl[0]
        tar_file_location = tar_file_nl[1]

        if pem_path is not None:
            response = self.emr.list_instances(ClusterId=cluster_id, )
            response_code = response['ResponseMetadata']['HTTPStatusCode']
            if response_code == 200:

                hosts = self.active_instances(response)

                print(hosts)

                client = ParallelSSHClient(hosts, user='******', pkey=pem_path)
                copy_files = client.copy_file(tar_file_location,
                                              '/home/hadoop/' + tar_file_name)
                joinall(copy_files, raise_error=True)

                output = client.run_command(
                    "python3 -m pip install --upgrade --no-deps --force-reinstall /home/hadoop/"
                    + tar_file_name,
                    sudo=True)
                for host, host_output in output.items():
                    if quiet_mode:
                        for line in host_output.stderr:
                            print(line)
                    else:
                        for line in host_output.stdout:
                            print(line)
                print("Deployed to all nodes")

        return