def sendConfig(nodeIP, configFile): pod = util.get_pod_from_ip(client, nodeIP) pname = pod.metadata.name # There is only 1 container in each Pod cname = pod.spec.containers[0].name cfile = configFile if configFile else BASE_CONFIG_FILE os.system(str('cp %s ' + CONFIG_FILE) % cfile) util.copy_file_to_pod(client, CONFIG_FILE[2:], pname, POD_CONFIG_DIR, cname) os.system('rm ' + CONFIG_FILE) # Sending replica txt replica_ips = util.get_node_ips(client, 'role=aft', 'ExternalIP') with open('replicas.txt', 'w') as f: for ip in replica_ips: f.write(ip + '\n') util.copy_file_to_pod(client, 'replicas.txt', pname, '/go/src/github.com/tajshaik24/aft', 'aft-container') os.system('rm replicas.txt')
def add_nodes(client, apps_client, cfile, kind, count, aws_key_id=None, aws_key=None, create=False, prefix=None, branch="master"): print('Adding %d %s server node(s) to cluster...' % (count, kind)) prev_count = util.get_previous_count(client, kind) util.run_process(['./modify_ig.sh', kind, str(count + prev_count)], 'kops') util.run_process(['./validate_cluster.sh'], 'kops') if create: fname = 'yaml/ds/%s-ds.yml' % kind yml = util.load_yaml(fname, prefix) for container in yml['spec']['template']['spec']['containers']: env = container['env'] util.replace_yaml_val(env, 'BRANCH', branch) util.replace_yaml_val(env, 'AWS_ACCESS_KEY_ID', aws_key_id) util.replace_yaml_val(env, 'AWS_SECRET_ACCESS_KEY', aws_key) if kind == "tasc": routing_svc = util.get_service_address(client, 'routing-service') util.replace_yaml_val(env, 'ROUTING_ILB', routing_svc) monitor_ip = util.get_node_ips(client, 'role=monitor', 'ExternalIP')[0] util.replace_yaml_val(env, 'MONITOR', monitor_ip) worker_svc = util.get_service_address(client, 'worker-service') util.replace_yaml_val(env, 'WORKER_ILB', worker_svc) if kind == "keynode": monitor_ip = util.get_node_ips(client, 'role=monitor', 'ExternalIP')[0] util.replace_yaml_val(env, 'MONITOR', monitor_ip) if kind == 'worker': monitor_ip = util.get_node_ips(client, 'role=monitor', 'ExternalIP')[0] util.replace_yaml_val(env, 'MONITOR', monitor_ip) routing_svc = util.get_service_address(client, 'routing-service') util.replace_yaml_val(env, 'ROUTING_ILB', routing_svc) apps_client.create_namespaced_daemon_set(namespace=util.NAMESPACE, body=yml) # Wait until all pods of this kind are running res = [] while len(res) != count: res = util.get_pod_ips(client, 'role=' + kind, is_running=True) created_pods = [] pods = client.list_namespaced_pod(namespace=util.NAMESPACE, label_selector='role=' + kind).items # Send kube config to lb if kind == 'lb': kubecfg = os.path.join(os.environ['HOME'], '.kube/config') for pod in pods: cname = pod.spec.containers[0].name util.copy_file_to_pod(client, kubecfg, pod.metadata.name, '/root/.kube', cname) # Generate list of all recently created pods. created_pod_ips = [] for pod in pods: created_pod_ips.append(pod.status.pod_ip) pname = pod.metadata.name for container in pod.spec.containers: cname = container.name created_pods.append((pname, cname)) # Copy the KVS config into all recently created pods. cfile_name = './tasc-config.yml' if kind != 'routing' else './anna-config.yml' cfile_dir = '/go/src/github.com/saurav-c/tasc/config' if kind != 'routing' else 'hydro/anna/conf' os.system(str('cp %s ' + cfile_name) % cfile) for pname, cname in created_pods: util.copy_file_to_pod(client, cfile_name[2:], pname, cfile_dir, cname) os.system('rm ' + cfile_name)
def add_nodes(client, apps_client, cfile, kinds, counts, management_ip, aws_key_id=None, aws_key=None, create=False, prefix=None): for i in range(len(kinds)): print('Adding %d %s server node(s) to cluster...' % (counts[i], kinds[i])) prev_count = util.get_previous_count(client, kinds[i]) util.run_process( ['./modify_ig.sh', kinds[i], str(counts[i] + prev_count)]) util.run_process(['./validate_cluster.sh']) replica_str = ' '.join(util.get_node_ips(client, 'role=aft')) # Create should only be true when the DaemonSet is being created for the # first time -- i.e., when this is called from create_cluster. After that, # we can basically ignore this because the DaemonSet will take care of # adding pods to created nodes. if create: for i in range(len(kinds)): kind = kinds[i] fname = 'yaml/ds/%s-ds.yml' % kind yml = util.load_yaml(fname, prefix) for container in yml['spec']['template']['spec']['containers']: env = container['env'] util.replace_yaml_val(env, 'REPLICA_IPS', replica_str) util.replace_yaml_val(env, 'MANAGER', management_ip) util.replace_yaml_val(env, 'AWS_ACCESS_KEY_ID', aws_key_id) util.replace_yaml_val(env, 'AWS_SECRET_ACCESS_KEY', aws_key) apps_client.create_namespaced_daemon_set(namespace=util.NAMESPACE, body=yml) # Wait until all pods of this kind are running res = [] while len(res) != counts[i]: res = util.get_pod_ips(client, 'role=' + kind, is_running=True) created_pods = [] pods = client.list_namespaced_pod(namespace=util.NAMESPACE, label_selector='role=' + kind).items # Generate list of all recently created pods. for pod in pods: pname = pod.metadata.name for container in pod.spec.containers: cname = container.name created_pods.append((pname, cname)) pod.metadata.labels['aftReady'] = 'isready' client.patch_namespaced_pod(pod.metadata.name, util.NAMESPACE, pod) # Copy the KVS config into all recently created pods. os.system('cp %s ./aft-config.yml' % cfile) for pname, cname in created_pods: util.copy_file_to_pod( client, 'aft-config.yml', pname, '/go/src/github.com/Alchem-Lab/aft/config', cname) os.system('rm ./aft-config.yml')
def register(client, ips): rtr_ips = util.get_node_ips(client, selector='role=routing', tp='ExternalIP') for ip in ips: join_hash_ring(rtr_ips, ip, ip)
def create_cluster(replica_count, gc_count, lb_count, bench_count, cfile, ssh_key, cluster_name, kops_bucket, aws_key_id, aws_key): prefix = './' util.run_process(['./create_cluster_object.sh', kops_bucket, ssh_key]) client, apps_client = util.init_k8s() print('Creating management pod') # management_spec = util.load_yaml('yaml/pods/management-pod.yml') # env = management_spec['spec']['containers'][0]['env'] # util.replace_yaml_val(env, 'AWS_ACCESS_KEY_ID', aws_key_id) # util.replace_yaml_val(env, 'AWS_SECRET_ACCESS_KEY', aws_key) # # client.create_namespaced_pod(namespace=util.NAMESPACE, # body=management_spec) # management_ip = util.get_pod_ips(client, 'role=management', # is_running=True)[0] management_ip = "" print('Creating standby replicas...') util.run_process(['./modify_ig.sh', 'standby', '1']) util.run_process(['./validate_cluster.sh']) print('Creating %d load balancer, %d GC replicas...' % (lb_count, gc_count)) add_nodes(client, apps_client, cfile, ['lb', 'gc'], [lb_count, gc_count], management_ip, aws_key_id, aws_key, True, prefix) lb_pods = client.list_namespaced_pod(namespace=util.NAMESPACE, label_selector="role=lb").items kubecfg = os.path.join(os.environ['HOME'], '.kube/config') for pod in lb_pods: util.copy_file_to_pod(client, kubecfg, pod.metadata.name, '/root/.kube', 'lb-container') replica_ips = util.get_node_ips(client, 'role=gc', 'ExternalIP') with open('gcs.txt', 'w') as f: for ip in replica_ips: f.write(ip + '\n') # Wait until the monitoring pod is finished creating to get its IP address # and then copy KVS config into the monitoring pod. print('Creating %d Aft replicas...' % (replica_count)) add_nodes(client, apps_client, cfile, ['aft'], [replica_count], management_ip, aws_key_id, aws_key, True, prefix) util.get_pod_ips(client, 'role=aft') replica_ips = util.get_node_ips(client, 'role=aft', 'ExternalIP') with open('replicas.txt', 'w') as f: for ip in replica_ips: f.write(ip + '\n') os.system('cp %s aft-config.yml' % cfile) management_pname = management_spec['metadata']['name'] management_cname = management_spec['spec']['containers'][0]['name'] util.copy_file_to_pod(client, 'aft-config.yml', management_pname, '/go/src/github.com/tajshaik24/aft/config', management_cname) util.copy_file_to_pod(client, 'replicas.txt', management_pname, '/go/src/github.com/tajshaik24/aft', management_cname) util.copy_file_to_pod(client, 'gcs.txt', management_pname, '/go/src/github.com/tajshaik24/aft', management_cname) util.copy_file_to_pod(client, kubecfg, management_pname, '/root/.kube/', management_cname) os.system('rm aft-config.yml') os.system('rm gcs.txt') # Copy replicas.txt to all Aft pods. aft_pod_list = client.list_namespaced_pod(namespace=util.NAMESPACE, label_selector="role=aft").items aft_pod_list = list(map(lambda pod: pod.metadata.name, aft_pod_list)) for pname in aft_pod_list: util.copy_file_to_pod(client, 'replicas.txt', pname, '/go/src/github.com/tajshaik24/aft', 'aft-container') gc_pod_list = client.list_namespaced_pod(namespace=util.NAMESPACE, label_selector="role=gc").items gc_pod_list = list(map(lambda pod: pod.metadata.name, gc_pod_list)) for pname in gc_pod_list: util.copy_file_to_pod(client, 'replicas.txt', pname, '/go/src/github.com/tajshaik24/aft', 'gc-container') os.system('rm replicas.txt') print('Adding %d benchmark nodes...' % (bench_count)) add_nodes(client, apps_client, cfile, ['benchmark'], [bench_count], management_ip, aws_key_id, aws_key, True, prefix) print('Finished creating all pods...') print('Creating Aft service...') service_spec = util.load_yaml('yaml/services/aft.yml', prefix) client.create_namespaced_service(namespace=util.NAMESPACE, body=service_spec) sg_name = 'nodes.' + cluster_name sg = ec2_client.describe_security_groups(Filters=[{ 'Name': 'group-name', 'Values': [sg_name] }])['SecurityGroups'][0] print('Authorizing ports for Aft replicas...') permission = [{ 'FromPort': 7654, 'IpProtocol': 'tcp', 'ToPort': 7656, 'IpRanges': [{ 'CidrIp': '0.0.0.0/0' }] }, { 'FromPort': 7777, 'IpProtocol': 'tcp', 'ToPort': 7782, 'IpRanges': [{ 'CidrIp': '0.0.0.0/0' }] }, { 'FromPort': 8000, 'IpProtocol': 'tcp', 'ToPort': 8003, 'IpRanges': [{ 'CidrIp': '0.0.0.0/0' }] }] ec2_client.authorize_security_group_ingress(GroupId=sg['GroupId'], IpPermissions=permission) print('Finished!')
def create_cluster(txn_count, keynode_count, rtr_count, worker_count, lb_count, benchmark_count, config_file, branch_name, ssh_key, cluster_name, kops_bucket, aws_key_id, aws_key, anna_config_file): prefix = './' util.run_process(['./create_cluster_object.sh', kops_bucket, ssh_key], 'kops') client, apps_client = util.init_k8s() print('Creating Monitor Node...') add_nodes(client, apps_client, config_file, "monitor", 1, aws_key_id, aws_key, True, prefix, branch_name) print('Creating %d Anna Routing Nodes...' % (rtr_count)) add_nodes(client, apps_client, anna_config_file, "routing", rtr_count, aws_key_id, aws_key, True, prefix, branch_name) print('Creating routing service...') service_spec = util.load_yaml('yaml/services/routing.yml', prefix) client.create_namespaced_service(namespace=util.NAMESPACE, body=service_spec) util.get_service_address(client, 'routing-service') print('Creating %d Key Nodes...' % (keynode_count)) add_nodes(client, apps_client, config_file, "keynode", keynode_count, aws_key_id, aws_key, True, prefix, branch_name) print('Creating %d Worker Nodes...' % (worker_count)) add_nodes(client, apps_client, config_file, "worker", worker_count, aws_key_id, aws_key, True, prefix, branch_name) print('Creating Worker Service...') service_spec = util.load_yaml('yaml/services/worker.yml', prefix) client.create_namespaced_service(namespace=util.NAMESPACE, body=service_spec) util.get_service_address(client, 'worker-service') print('Creating %d TASC nodes...' % (txn_count)) add_nodes(client, apps_client, config_file, 'tasc', txn_count, aws_key_id, aws_key, True, prefix, branch_name) print('Creating %d Load Balancers...' % (lb_count)) add_nodes(client, apps_client, config_file, 'lb', lb_count, aws_key_id, aws_key, True, prefix, branch_name) print('Creating TASC Load Balancing service...') service_spec = util.load_yaml('yaml/services/tasc.yml', prefix) client.create_namespaced_service(namespace=util.NAMESPACE, body=service_spec) print('Creating %d Benchmark nodes...' % (benchmark_count)) add_nodes(client, apps_client, config_file, 'benchmark', benchmark_count, aws_key_id, aws_key, True, prefix, branch_name) benchmark_ips = util.get_node_ips(client, 'role=benchmark', 'ExternalIP') with open('../cmd/benchmark/benchmarks.txt', 'w+') as f: for ip in benchmark_ips: f.write(ip + '\n') print('Finished creating all pods...') sg_name = 'nodes.' + cluster_name sg = ec2_client.describe_security_groups(Filters=[{ 'Name': 'group-name', 'Values': [sg_name] }])['SecurityGroups'][0] print("Authorizing Ports for TASC...") permission = [{ 'FromPort': 0, 'IpProtocol': 'tcp', 'ToPort': 65535, 'IpRanges': [{ 'CidrIp': '0.0.0.0/0' }] }] ec2_client.authorize_security_group_ingress(GroupId=sg['GroupId'], IpPermissions=permission) print('Registering Key Nodes...') keynode_pod_ips = util.get_pod_ips(client, 'role=keynode', is_running=True) register(client, keynode_pod_ips) print("\nThe TASC ELB Endpoint: " + util.get_service_address(client, "tasc-service") + "\n") print('Finished!')