def ensure_openssh_version(os_user): try: if not exists( datalab.fab.conn, '/home/{}/.ensure_dir/openssh_version_ensured'.format( os_user)): if os.environ['conf_openssh_version'] not in datalab.fab.conn.sudo( 'ssh -V').stdout: datalab.fab.conn.sudo('mkdir /var/lib/sshd') datalab.fab.conn.sudo('chmod -R 700 /var/lib/sshd/') datalab.fab.conn.sudo('chown -R root:sys /var/lib/sshd/') datalab.fab.conn.sudo( 'wget -c https://cdn.openbsd.org/pub/OpenBSD/OpenSSH/portable/openssh-{0}.tar.gz ' '-O /tmp/openssh-{0}.tar.gz'.format( os.environ['conf_openssh_version'])) datalab.fab.conn.sudo( 'bash -l -c "tar -zhxvf /tmp/openssh-{0}.tar.gz -C /tmp/; cd /tmp/openssh-{0}; ./configure; make; make install"' .format(os.environ['conf_openssh_version'])) datalab.fab.conn.sudo( 'touch /home/{}/.ensure_dir/openssh_version_ensured'.format( os_user)) except Exception as err: logging.error('Updating openssh to version:', str(err)) traceback.print_exc() sys.exit(1)
def configure_rstudio(): if not os.path.exists('/home/' + args.os_user + '/.ensure_dir/rstudio_dataengine-service_ensured'): try: subprocess.run('echo "export R_LIBS_USER='******'/R/lib:" >> /home/' + args.os_user + '/.bashrc', shell=True, check=True) subprocess.run("sed -i 's/^SPARK_HOME/#SPARK_HOME/' /home/" + args.os_user + "/.Renviron", shell=True, check=True) subprocess.run('echo \'SPARK_HOME="' + spark_dir + '"\' >> /home/' + args.os_user + '/.Renviron', shell=True, check=True) subprocess.run('echo \'YARN_CONF_DIR="' + yarn_dir + '"\' >> /home/' + args.os_user + '/.Renviron', shell=True, check=True) subprocess.run('echo \'HADOOP_CONF_DIR="' + yarn_dir + '"\' >> /home/' + args.os_user + '/.Renviron', shell=True, check=True) subprocess.run("sed -i 's/^master/#master/' /home/" + args.os_user + "/.Rprofile", shell=True, check=True) subprocess.run('''R -e "source('/home/{}/.Rprofile')"'''.format(args.os_user), shell=True, check=True) subprocess.run('touch /home/' + args.os_user + '/.ensure_dir/rstudio_dataengine-service_ensured', shell=True, check=True) except Exception as err: logging.error('Error: {0}'.format(err)) sys.exit(1) else: try: subprocess.run("sed -i '/R_LIBS_USER/ { s|=\(.*\)|=\\1" + spark_dir + "/R/lib:| }' /home/" + args.os_user + "/.bashrc", shell=True, check=True) subprocess.run("sed -i 's/^SPARK_HOME/#SPARK_HOME/' /home/" + args.os_user + "/.Renviron", shell=True, check=True) subprocess.run("sed -i 's/^YARN_CONF_DIR/#YARN_CONF_DIR/' /home/" + args.os_user + "/.Renviron", shell=True, check=True) subprocess.run("sed -i 's/^HADOOP_CONF_DIR/#HADOOP_CONF_DIR/' /home/" + args.os_user + "/.Renviron", shell=True, check=True) subprocess.run("sed -i 's/^master/#master/' /home/" + args.os_user + "/.Rprofile", shell=True, check=True) subprocess.run('echo \'SPARK_HOME="' + spark_dir + '"\' >> /home/' + args.os_user + '/.Renviron', shell=True, check=True) subprocess.run('echo \'YARN_CONF_DIR="' + yarn_dir + '"\' >> /home/' + args.os_user + '/.Renviron', shell=True, check=True) subprocess.run('echo \'HADOOP_CONF_DIR="' + yarn_dir + '"\' >> /home/' + args.os_user + '/.Renviron', shell=True, check=True) subprocess.run('''R -e "source('/home/{}/.Rprofile')"'''.format(args.os_user), shell=True, check=True) except Exception as err: logging.error('Error:', str(err)) sys.exit(1)
def find_java_path_remote(): try: java_path = datalab.fab.conn.sudo( "sh -c \"update-alternatives --query java | grep 'Value: ' | grep " "-o '/.*/jre'\"").stdout.replace('\n', '') return java_path except Exception as err: logging.error('Finding remote java path error:', str(err)) traceback.print_exc() sys.exit(1)
def ensure_java(os_user): try: if not exists(datalab.fab.conn, '/home/{}/.ensure_dir/java_ensured'.format(os_user)): manage_pkg('-y install', 'remote', 'openjdk-8-jdk-headless') datalab.fab.conn.sudo( 'touch /home/{}/.ensure_dir/java_ensured'.format(os_user)) except Exception as err: logging.error('Installing Java error:', str(err)) traceback.print_exc() sys.exit(1)
def remove_user_key(args): try: client = boto3.client('s3', config=botoConfig(signature_version='s3v4'), region_name=args.region) client.delete_object(Bucket=args.s3_bucket, Key=args.project_name + '.pub') except Exception as err: logging.error("Unable to remove user key: " + str(err) + "\n Traceback: " + traceback.print_exc(file=sys.stdout))
def disable_edge_scp_binary(os_user): try: if not exists( datalab.fab.conn, '/home/{}/.ensure_dir/disabled_scp_binary'.format(os_user)): datalab.fab.conn.sudo('mv /usr/bin/scp /usr/bin/scp_disabled') datalab.fab.conn.sudo( 'touch /home/{}/.ensure_dir/disabled_scp_binary'.format(os_user)) except Exception as err: logging.error('Updating openssh to version:', str(err)) traceback.print_exc() sys.exit(1)
def upload_jars_parser(args): try: s3 = boto3.resource('s3', config=botoConfig(signature_version='s3v4')) s3.meta.client.upload_file( '/root/scripts/dataengine-service_jars_parser.py', args.s3_bucket, 'jars_parser.py', ExtraArgs={'ServerSideEncryption': 'AES256'}) except Exception as err: logging.error("Unable to upload jars to s3: " + str(err) + "\n Traceback: " + traceback.print_exc(file=sys.stdout))
def find_java_path_local(): try: java_path = subprocess.run( "sh -c \"update-alternatives --query java | grep 'Value: ' | grep " "-o '/.*/jre'\"", capture_output=True, shell=True, check=True).stdout.decode('UTF-8').rstrip("\n\r") return java_path except Exception as err: logging.error('Finding local java path error:', str(err)) traceback.print_exc() sys.exit(1)
def ensure_pkg( os_user, requisites='linux-headers-$(uname -r) python3-pip python3-dev python3-virtualenv ' 'groff gcc vim less git wget ' 'libssl-dev unattended-upgrades nmap ' 'libffi-dev unzip libxml2-dev haveged'): try: if not exists(datalab.fab.conn, '/home/{}/.ensure_dir/pkg_upgraded'.format(os_user)): count = 0 check = False while not check: if count > 60: logging.error( "Repositories are not available. Please, try again later." ) sys.exit(1) else: try: logging.info( "Updating repositories " "and installing requested tools: {}".format( requisites)) logging.info( "Attempt number " + str(count) + " to install requested tools. Max 60 tries.") manage_pkg('update', 'remote', '') manage_pkg('-y install', 'remote', requisites) datalab.fab.conn.sudo('unattended-upgrades -v') datalab.fab.conn.sudo( 'sed -i \'s|APT::Periodic::Unattended-Upgrade "1"|APT::Periodic::Unattended-Upgrade "0"|\' ' '/etc/apt/apt.conf.d/20auto-upgrades') datalab.fab.conn.run('export LC_ALL=C') datalab.fab.conn.sudo( 'touch /home/{}/.ensure_dir/pkg_upgraded'.format( os_user)) datalab.fab.conn.sudo('systemctl enable haveged') datalab.fab.conn.sudo('systemctl start haveged') if os.environ['conf_cloud_provider'] == 'aws': manage_pkg('-y install --install-recommends', 'remote', 'linux-aws-hwe') check = True except: count += 1 time.sleep(50) except Exception as err: logging.error('Installing prerequisites packages error:', str(err)) traceback.print_exc() sys.exit(1)
def ensure_step(os_user): try: if not exists(datalab.fab.conn, '/home/{}/.ensure_dir/step_ensured'.format(os_user)): manage_pkg('-y install', 'remote', 'wget') datalab.fab.conn.sudo( 'wget https://github.com/smallstep/cli/releases/download/v0.13.3/step-cli_0.13.3_amd64.deb ' '-O /tmp/step-cli_0.13.3_amd64.deb') datalab.fab.conn.sudo('dpkg -i /tmp/step-cli_0.13.3_amd64.deb') datalab.fab.conn.sudo( 'touch /home/{}/.ensure_dir/step_ensured'.format(os_user)) except: logging.error('Installing step-cli error:', str(err)) traceback.print_exc() sys.exit(1)
def upload_user_key(args): try: s3 = boto3.resource('s3', config=botoConfig(signature_version='s3v4')) s3.meta.client.upload_file( args.key_dir + '/' + args.project_name + '.pub', args.s3_bucket, args.project_name + '/' + args.project_name + '.pub', ExtraArgs={'ServerSideEncryption': 'AES256'}) s3.meta.client.upload_file( '/root/scripts/dataengine-service_key_importer.py', args.s3_bucket, 'key_importer.py', ExtraArgs={'ServerSideEncryption': 'AES256'}) except Exception as err: logging.error("Unable to upload user key to s3: " + str(err) + "\n Traceback: " + traceback.print_exc(file=sys.stdout))
def get_object_count(bucket, prefix): try: s3_cli = boto3.client('s3', config=botoConfig(signature_version='s3v4'), region_name=args.region) content = s3_cli.get_paginator('list_objects') file_list = [] try: for i in content.paginate(Bucket=bucket, Delimiter='/', Prefix=prefix): for file in i.get('Contents'): file_list.append(file.get('Key')) count = len(file_list) except: logging.info("{} still not exist. Waiting...".format(prefix)) count = 0 return count except Exception as err: logging.error("Unable to get objects from s3: " + str(err) + "\n Traceback: " + traceback.print_exc(file=sys.stdout))
def ensure_ntpd(os_user, edge_private_ip=''): try: if not exists(datalab.fab.conn, '/home/{}/.ensure_dir/ntpd_ensured'.format(os_user)): datalab.fab.conn.sudo('timedatectl set-ntp no') manage_pkg('-y install', 'remote', 'ntp ntpdate') datalab.fab.conn.sudo( 'bash -c \"echo "tinker panic 0" >> /etc/ntp.conf\"') if os.environ['conf_resource'] != 'ssn' and os.environ[ 'conf_resource'] != 'edge': datalab.fab.conn.sudo( 'bash -c \"echo "server {} prefer iburst" >> /etc/ntp.conf\"' .format(edge_private_ip)) datalab.fab.conn.sudo('systemctl restart ntp') datalab.fab.conn.sudo('systemctl enable ntp') datalab.fab.conn.sudo( 'touch /home/{}/.ensure_dir/ntpd_ensured'.format(os_user)) except Exception as err: logging.error('Installing NTPD error:', str(err)) traceback.print_exc() sys.exit(1)
def get_instance_by_ip(ip): try: ec2 = boto3.resource('ec2') check = bool( re.match(r"^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$", ip)) if check: instances = ec2.instances.filter(Filters=[{ 'Name': 'private-ip-address', 'Values': [ip] }]) else: instances = ec2.instances.filter(Filters=[{ 'Name': 'private-dns-name', 'Values': [ip] }]) for instance in instances: return instance except Exception as err: logging.error("Unable to get instance by ip: " + str(err) + "\n Traceback: " + traceback.print_exc(file=sys.stdout))
def parse_steps(step_string): try: parser = re.split('; |;', step_string) steps = [] for i in parser: step_parser = re.split(', |,', i) task = {} hdp_jar_step = {} for j in step_parser: key, value = j.split("=") if key == "Args": value = value.split(" ") hdp_jar_step.update({key: value}) elif key == "Jar": hdp_jar_step.update({key: value}) else: task.update({key: value}) task.update({"HadoopJarStep": hdp_jar_step}) steps.append(task) return steps except Exception as err: logging.error("Failed to parse steps: " + str(err) + "\n Traceback: " + traceback.print_exc(file=sys.stdout))
try: logging.info('[TERMINATE PROJECT]') try: terminate_edge_node(project_conf['tag_name'], project_conf['project_name'], project_conf['tag_value'], project_conf['nb_sg'], project_conf['edge_sg'], project_conf['de_sg'], project_conf['emr_sg'], project_conf['endpoint_name'], project_conf['service_base_name']) except Exception as err: traceback.print_exc() datalab.fab.append_result("Failed to terminate project.", str(err)) except Exception as err: logging.error('Error: {0}'.format(err)) sys.exit(1) try: endpoint_id = datalab.meta_lib.get_instance_by_name( project_conf['tag_name'], project_conf['endpoint_instance_name']) logging.info("Endpoint id: " + endpoint_id) ec2 = boto3.client('ec2') ec2.delete_tags(Resources=[endpoint_id], Tags=[{ 'Key': 'project_tag' }, { 'Key': 'endpoint_tag' }]) except Exception as err: logging.error("Failed to remove Project tag from Enpoint", str(err))
datalab.actions_lib.create_tag( data_engine['slave_id'], data_engine['cluster_nodes_tag'], False) datalab.actions_lib.create_tag( data_engine['slave_id'], data_engine['cluster_nodes_resource_tag'], False) datalab.actions_lib.create_tag( data_engine['slave_id'], data_engine['cluster_nodes_billing_tag'], False) datalab.actions_lib.create_tag( data_engine['slave_id'], data_engine['cluster_nodes_tag_type'], False) except: traceback.print_exc() raise Exception except Exception as err: datalab.actions_lib.remove_ec2(data_engine['tag_name'], data_engine['master_node_name']) for i in range(data_engine['instance_count'] - 1): slave_name = data_engine['slave_node_name'] + '{}'.format(i + 1) try: datalab.actions_lib.remove_ec2(data_engine['tag_name'], slave_name) except: logging.error( "The slave instance {} hasn't been created.".format( slave_name)) datalab.fab.append_result("Failed to create slave instances.", str(err)) sys.exit(1)
except: traceback.print_exc() raise Exception if __name__ == "__main__": #creating aws config file try: logging.info('[CREATE AWS CONFIG FILE]') if 'aws_access_key' in os.environ and 'aws_secret_access_key' in os.environ: datalab.actions_lib.create_aws_config_files( generate_full_config=True) else: datalab.actions_lib.create_aws_config_files() except Exception as err: logging.error('Unable to create configuration') datalab.fab.append_result("Unable to create configuration", err) traceback.print_exc() sys.exit(1) #deriving variables for ssn node deployment try: logging.info('[DERIVING NAMES]') ssn_conf = dict() ssn_conf['service_base_name'] = os.environ[ 'conf_service_base_name'] = datalab.fab.replace_multi_symbols( os.environ['conf_service_base_name'][:20], '-', True) ssn_conf['role_name'] = '{}-ssn-role'.format( ssn_conf['service_base_name']) ssn_conf['role_profile_name'] = '{}-ssn-profile'.format( ssn_conf['service_base_name'])
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. # # ****************************************************************************** import argparse from datalab.actions_lib import * from datalab.meta_lib import * from datalab.logger import logging parser = argparse.ArgumentParser() parser.add_argument('--resource_group_name', type=str, default='') parser.add_argument('--region', type=str, default='') args = parser.parse_args() if __name__ == "__main__": if args.resource_group_name != '': if AzureMeta().get_resource_group(args.resource_group_name): logging.info("REQUESTED RESOURCE GROUP {} EXISTS".format( args.resource_group_name)) else: logging.info("Creating Resource Group {}".format( args.resource_group_name)) AzureActions().create_resource_group(args.resource_group_name, args.region) else: logging.error("Resource group name can't be empty.") sys.exit(1)
notebook_config['image_name'] = os.environ['notebook_image_name'] logging.info('Pre-configured deeplearning image found. Using: {}'.format(notebook_config['image_name'])) else: notebook_config['notebook_image_name'] = (lambda x: '{0}-{1}-{2}-{3}-{4}'.format( notebook_config['service_base_name'], notebook_config['project_name'], notebook_config['endpoint_name'], os.environ['application'], os.environ['notebook_image_name']).replace('_', '-') if (x != 'None' and x != '') else notebook_config['expected_image_name'])(str(os.environ.get('notebook_image_name'))) if AzureMeta.get_image(notebook_config['resource_group_name'], notebook_config['notebook_image_name']): notebook_config['image_name'] = notebook_config['notebook_image_name'] notebook_config['image_type'] = 'pre-configured' logging.info('Pre-configured image found. Using: {}'.format(notebook_config['notebook_image_name'])) else: os.environ['notebook_image_name'] = notebook_config['image_name'] logging.info('No pre-configured image found. Using default one: {}'.format(notebook_config['image_name'])) except Exception as err: logging.error("Failed to generate variables dictionary.") datalab.fab.append_result("Failed to generate variables dictionary.", str(err)) sys.exit(1) try: edge_status = AzureMeta.get_instance_status(notebook_config['resource_group_name'], '{0}-{1}-{2}-edge'.format(os.environ['conf_service_base_name'], notebook_config['project_name'], notebook_config['endpoint_name'])) if edge_status != 'running': logging.info('ERROR: Edge node is unavailable! Aborting...') ssn_hostname = AzureMeta.get_private_ip_address(notebook_config['resource_group_name'], os.environ['conf_service_base_name'] + '-ssn') datalab.fab.put_resource_status('edge', 'Unavailable', os.environ['ssn_datalab_path'], os.environ['conf_os_user'],
try: subprocess.run("~/scripts/{}.py {}".format( 'ssn_create_vpc', params), shell=True, check=True) os.environ['gcp_vpc_name'] = ssn_conf['vpc_name'] except: traceback.print_exc() raise Exception except Exception as err: datalab.fab.append_result("Failed to create VPC.", str(err)) if not ssn_conf['pre_defined_vpc']: try: GCPActions.remove_vpc(ssn_conf['vpc_name']) except: logging.error("VPC hasn't been created.") sys.exit(1) try: ssn_conf['vpc_selflink'] = GCPMeta.get_vpc( ssn_conf['vpc_name'])['selfLink'] if 'gcp_subnet_name' not in os.environ: raise KeyError else: ssn_conf['pre_defined_subnet'] = True ssn_conf['subnet_name'] = os.environ['gcp_subnet_name'] except KeyError: try: logging.info('[CREATE SUBNET]') params = "--subnet_name {} --region {} --vpc_selflink {} --prefix {} --vpc_cidr {} --ssn {}".\ format(ssn_conf['subnet_name'], ssn_conf['region'], ssn_conf['vpc_selflink'], ssn_conf['subnet_prefix'],
def build_emr_cluster(args): try: # Parse applications apps = args.applications.split(" ") names = [] for i in apps: names.append({"Name": i}) # Parse Tags parser = re.split('[, ]+', args.tags) tags = list() for i in parser: key, value = i.split("=") tags.append({"Value": value, "Key": key}) tags.append({ 'Key': os.environ['conf_tag_resource_id'], 'Value': '{}:{}'.format(args.service_base_name, args.name) }) tags.append({ 'Key': os.environ['conf_billing_tag_key'], 'Value': os.environ['conf_billing_tag_value'] }) prefix = "jars/" + args.release_label + "/lib/" jars_exist = get_object_count(args.s3_bucket, prefix) # Parse steps if args.steps != '': global cp_config cp_config = cp_config + "; " + args.steps if args.cp_jars_2_s3 or jars_exist == 0: steps = parse_steps(cp_config + "; " + cp_jars) else: steps = parse_steps(cp_config) if args.dry_run: logging.info("Build parameters are:") logging.info(args) logging.info("\n") logging.info("Applications to be installed:") logging.info(names) logging.info("\n") logging.info("Cluster tags:") logging.info(tags) logging.info("\n") logging.info("Cluster Jobs:") logging.info(steps) if not args.dry_run: socket = boto3.client('emr') if args.slave_instance_spot == 'True': result = socket.run_job_flow( Name=args.name, ReleaseLabel=args.release_label, Instances={ 'Ec2KeyName': args.ssh_key, 'KeepJobFlowAliveWhenNoSteps': not args.auto_terminate, 'Ec2SubnetId': get_subnet_by_cidr(args.subnet, os.environ['aws_notebook_vpc_id']), 'InstanceGroups': [{ 'Market': 'SPOT', 'BidPrice': args.bid_price[:5], 'InstanceRole': 'CORE', 'InstanceType': args.slave_instance_type, 'InstanceCount': int(args.instance_count) - 1 }, { 'Market': 'ON_DEMAND', 'InstanceRole': 'MASTER', 'InstanceType': args.master_instance_type, 'InstanceCount': 1 }], 'AdditionalMasterSecurityGroups': [get_security_group_by_name(args.additional_emr_sg)], 'AdditionalSlaveSecurityGroups': [get_security_group_by_name(args.additional_emr_sg)] }, Applications=names, Tags=tags, Steps=steps, VisibleToAllUsers=not args.auto_terminate, JobFlowRole=args.ec2_role, ServiceRole=args.service_role, Configurations=ast.literal_eval(args.configurations)) else: result = socket.run_job_flow( Name=args.name, ReleaseLabel=args.release_label, Instances={ 'MasterInstanceType': args.master_instance_type, 'SlaveInstanceType': args.slave_instance_type, 'InstanceCount': args.instance_count, 'Ec2KeyName': args.ssh_key, # 'Placement': {'AvailabilityZone': args.availability_zone}, 'KeepJobFlowAliveWhenNoSteps': not args.auto_terminate, 'Ec2SubnetId': get_subnet_by_cidr(args.subnet, os.environ['aws_notebook_vpc_id']), 'AdditionalMasterSecurityGroups': [get_security_group_by_name(args.additional_emr_sg)], 'AdditionalSlaveSecurityGroups': [get_security_group_by_name(args.additional_emr_sg)] }, Applications=names, Tags=tags, Steps=steps, VisibleToAllUsers=not args.auto_terminate, JobFlowRole=args.ec2_role, ServiceRole=args.service_role, Configurations=ast.literal_eval(args.configurations)) logging.info("Cluster_id {}".format(result.get('JobFlowId'))) return result.get('JobFlowId') except Exception as err: logging.error("Failed to build EMR cluster: " + str(err) + "\n Traceback: " + traceback.print_exc(file=sys.stdout))
os.environ['aws_user_predefined_s3_policies'] = 'None' try: if os.environ['conf_user_subnets_range'] == '': raise KeyError except KeyError: os.environ['conf_user_subnets_range'] = '' # FUSE in case of absence of user's key try: project_conf['user_key'] = os.environ['key'] try: subprocess.run('echo "{0}" >> {1}{2}.pub'.format(project_conf['user_key'], os.environ['conf_key_dir'], project_conf['project_name']), shell=True, check=True) except: logging.error("ADMINSs PUBLIC KEY DOES NOT INSTALLED") except KeyError: logging.error("ADMINSs PUBLIC KEY DOES NOT UPLOADED") sys.exit(1) logging.info("Will create exploratory environment with edge node as access point as following: {}". format(json.dumps(project_conf, sort_keys=True, indent=4, separators=(',', ': ')))) logging.info(json.dumps(project_conf)) if 'conf_additional_tags' in os.environ: project_conf['bucket_additional_tags'] = ';' + os.environ['conf_additional_tags'] os.environ['conf_additional_tags'] = os.environ['conf_additional_tags'] + \ ';project_tag:{0};endpoint_tag:{1};'.format( project_conf['project_tag'], project_conf['endpoint_tag']) else: project_conf['bucket_additional_tags'] = ''
def configure_rstudio(): if not os.path.exists('/home/' + args.os_user + '/.ensure_dir/rstudio_dataengine-service_ensured'): try: subprocess.run('echo "export R_LIBS_USER='******'/R/lib:" >> /home/' + args.os_user + '/.bashrc', shell=True, check=True) subprocess.run("sed -i 's/^SPARK_HOME/#SPARK_HOME/' /home/" + args.os_user + "/.Renviron", shell=True, check=True) subprocess.run('echo \'SPARK_HOME="' + spark_dir + '"\' >> /home/' + args.os_user + '/.Renviron', shell=True, check=True) subprocess.run('echo \'YARN_CONF_DIR="' + yarn_dir + '"\' >> /home/' + args.os_user + '/.Renviron', shell=True, check=True) subprocess.run('echo \'HADOOP_CONF_DIR="' + yarn_dir + '"\' >> /home/' + args.os_user + '/.Renviron', shell=True, check=True) subprocess.run("sed -i 's/^master/#master/' /home/" + args.os_user + "/.Rprofile", shell=True, check=True) subprocess.run('''R -e "source('/home/{}/.Rprofile')"'''.format( args.os_user), shell=True, check=True) #fix emr 5.19 problem with warnings in rstudio because of bug in AWS configuration if args.emr_version == "emr-5.19.0": subprocess.run("sed -i '/DRFA/s/^/#/' " + spark_dir + "conf/log4j.properties", shell=True, check=True) subprocess.run('touch /home/' + args.os_user + '/.ensure_dir/rstudio_dataengine-service_ensured', shell=True, check=True) except Exception as err: logging.error('Error: {0}'.format(err)) sys.exit(1) else: try: subprocess.run("sed -i '/R_LIBS_USER/ { s|=\(.*\)|=\\1" + spark_dir + "/R/lib:| }' /home/" + args.os_user + "/.bashrc", shell=True, check=True) subprocess.run("sed -i 's/^SPARK_HOME/#SPARK_HOME/' /home/" + args.os_user + "/.Renviron", shell=True, check=True) subprocess.run("sed -i 's/^YARN_CONF_DIR/#YARN_CONF_DIR/' /home/" + args.os_user + "/.Renviron", shell=True, check=True) subprocess.run( "sed -i 's/^HADOOP_CONF_DIR/#HADOOP_CONF_DIR/' /home/" + args.os_user + "/.Renviron", shell=True, check=True) subprocess.run("sed -i 's/^master/#master/' /home/" + args.os_user + "/.Rprofile", shell=True, check=True) subprocess.run('echo \'SPARK_HOME="' + spark_dir + '"\' >> /home/' + args.os_user + '/.Renviron', shell=True, check=True) subprocess.run('echo \'YARN_CONF_DIR="' + yarn_dir + '"\' >> /home/' + args.os_user + '/.Renviron', shell=True, check=True) subprocess.run('echo \'HADOOP_CONF_DIR="' + yarn_dir + '"\' >> /home/' + args.os_user + '/.Renviron', shell=True, check=True) subprocess.run('''R -e "source('/home/{}/.Rprofile')"'''.format( args.os_user), shell=True, check=True) #fix emr 5.19 problem with warnings in rstudio because of bug in AWS configuration if args.emr_version == "emr-5.19.0": subprocess.run("sed -i '/DRFA/s/^/#/' " + spark_dir + "conf/log4j.properties", shell=True, check=True) except Exception as err: logging.error('Error: {0}'.format(err)) sys.exit(1)
subnets_cidr.append(subnet.address_prefix) sorted_subnets_cidr = sorted(subnets_cidr) if not subnets_cidr: empty_vpc = True last_ip = first_vpc_ip for cidr in sorted_subnets_cidr: first_ip = int(ipaddress.IPv4Address(cidr.split('/')[0])) if first_ip - last_ip < private_subnet_size: subnet_size = ipaddress.ip_network(u'{}'.format(cidr)).num_addresses last_ip = first_ip + subnet_size - 1 else: break if empty_vpc: datalab_subnet_cidr = '{0}/{1}'.format(ipaddress.ip_address(last_ip), args.prefix) else: datalab_subnet_cidr = '{0}/{1}'.format(ipaddress.ip_address(last_ip + 1), args.prefix) if args.subnet_name != '': if AzureMeta().get_subnet(args.resource_group_name, args.vpc_name, args.subnet_name): logging.info("REQUESTED SUBNET {} ALREADY EXISTS".format(args.subnet_name)) else: logging.info("Creating Subnet {}".format(args.subnet_name)) AzureActions().create_subnet(args.resource_group_name, args.vpc_name, args.subnet_name, datalab_subnet_cidr) else: logging.info("Subnet name can't be empty") sys.exit(1) except Exception as err: logging.error('Error: {0}'.format(err)) sys.exit(1)
args.source_virtual_network_name, args.destination_virtual_network_name) destination_virtual_network_peering_name = '{}_to_{}'.format( args.destination_virtual_network_name, args.source_virtual_network_name) destination_vnet_id = AzureMeta().get_vpc( args.destination_resource_group_name, args.destination_virtual_network_name, ).id source_vnet_id = AzureMeta().get_vpc( args.source_resource_group_name, args.source_virtual_network_name, ).id logging.info("Creating Virtual Network peering {} and {}".format( source_virtual_network_peering_name, destination_virtual_network_peering_name)) AzureActions().create_virtual_network_peerings( args.source_resource_group_name, args.source_virtual_network_name, source_virtual_network_peering_name, destination_vnet_id) AzureActions().create_virtual_network_peerings( args.destination_resource_group_name, args.destination_virtual_network_name, destination_virtual_network_peering_name, source_vnet_id) time.sleep(250) except Exception as err: logging.error("Error creating vpc peering: " + str(err)) sys.exit(1)
json_keycloak_client_id = json.loads(keycloak_get_id_client.text) keycloak_id_client = json_keycloak_client_id[0]['id'] keycloak_client_delete_url = '{0}/admin/realms/{1}/clients/{2}'.format( os.environ['keycloak_auth_server_url'], os.environ['keycloak_realm_name'], keycloak_id_client) keycloak_client = requests.delete( keycloak_client_delete_url, headers={ "Authorization": "Bearer " + keycloak_token.get("access_token"), "Content-Type": "application/json" }) except Exception as err: logging.error("Failed to remove project client from Keycloak", str(err)) try: with open("/root/result.json", 'w') as result: res = { "service_base_name": project_conf['service_base_name'], "project_name": project_conf['project_name'], "Action": "Terminate project" } logging.info(json.dumps(res)) result.write(json.dumps(res)) except Exception as err: datalab.fab.append_result("Error with writing results", str(err)) sys.exit(1)
parser.add_argument('--endpoint_name', type=str, default='') parser.add_argument('--user_predefined_s3_policies', type=str, default='') args = parser.parse_args() if __name__ == "__main__": if args.bucket_name: try: handler = open('/root/templates/edge_s3_policy.json', 'r') policy = handler.read() policy = policy.replace('BUCKET_NAME', args.bucket_name) policy = policy.replace('SHARED_BUCK', args.shared_bucket_name) if args.region == 'cn-north-1': policy = policy.replace('aws', 'aws-cn') except OSError: logging.error("Failed to open policy template") sys.exit(1) list_policies_arn = [] if args.user_predefined_s3_policies != 'None': list_predefined_policies = args.user_predefined_s3_policies.split(',') try: iam = boto3.client('iam') try: if args.user_predefined_s3_policies != 'None': list = iam.list_policies().get('Policies') for i in list: if i.get('PolicyName') in list_predefined_policies: list_policies_arn.append(i.get('Arn')) response = iam.create_policy(PolicyName='{}-{}-{}-strict_to_S3-Policy'.
# KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. # # ****************************************************************************** import argparse from datalab.actions_lib import * from datalab.meta_lib import * from datalab.logger import logging parser = argparse.ArgumentParser() parser.add_argument('--vpc_name', type=str, default='') parser.add_argument('--resource_group_name', type=str, default='') parser.add_argument('--region', type=str, default='') parser.add_argument('--vpc_cidr', type=str, default='') args = parser.parse_args() if __name__ == "__main__": if args.vpc_name != '': if AzureMeta().get_vpc(args.resource_group_name, args.vpc_name): logging.info("REQUESTED VIRTUAL NETWORK {} EXISTS".format( args.vpc_name)) else: logging.info("Creating Virtual Network {}".format(args.vpc_name)) AzureActions().create_vpc(args.resource_group_name, args.vpc_name, args.region, args.vpc_cidr) else: logging.error("VPC name can't be empty.") sys.exit(1)
import ast import json import sys import traceback from datalab.actions_lib import * from datalab.fab import * from datalab.meta_lib import * from datalab.logger import logging from fabric import * parser = argparse.ArgumentParser() parser.add_argument('--resource_group_name', type=str, default='') parser.add_argument('--list_resources', type=str, default='') args = parser.parse_args() if __name__ == "__main__": data = ast.literal_eval(args.list_resources.replace('\'', '"')) statuses = {} try: try: data_instances = AzureMeta().get_list_instance_statuses( args.resource_group_name, data.get('host')) statuses['host'] = data_instances except: logging.error("Hosts JSON wasn't been provided") with open('/root/result.json', 'w') as outfile: json.dump(statuses, outfile) except Exception as err: traceback.print_exc() append_result("Failed getting resources statuses.", str(err)) sys.exit(1)