def haproxy_enable(nodes): patt.host_id(nodes) patt.check_dup_id(nodes) result = patt.exec_script(nodes=nodes, src="./dscripts/d40.haproxy.sh", args=['enable'], sudo=True) log_results(result)
def floating_ip_enable(nodes, floating_ips): patt.host_id(nodes) patt.check_dup_id(nodes) result = patt.exec_script(nodes=nodes, src="./dscripts/d25.floating_ip.sh", args=['enable', " ".join(floating_ips)], sudo=True) log_results(result)
def walg_init(walg_version, nodes): logger.info ("processing {}".format ([n.hostname for n in nodes])) patt.host_id(nodes) patt.check_dup_id (nodes) result = patt.exec_script (nodes=nodes, src="./dscripts/d27.walg.sh", args=['init'] + [walg_version], sudo=False) log_results (result) return all(x == True for x in [bool(n.out) for n in result])
def postgres_ssl_cert_init(nodes): logger.info("processing {}".format([n.hostname for n in nodes])) patt.host_id(nodes) patt.check_dup_id(nodes) result = patt.exec_script(nodes=nodes, src="dscripts/ssl_cert_postgres.sh", args=['init'], sudo=True) log_results(result)
def postgres_init(postgres_version, nodes): logger.info("processing {}".format([n.hostname for n in nodes])) patt.host_id(nodes) patt.check_dup_id(nodes) result = patt.exec_script(nodes=nodes, src="./dscripts/d20.postgres.sh", args=['init'] + [postgres_version], sudo=True) log_results(result)
def floating_ip_init(nodes, ip_takeover_version="0.9"): patt.host_id(nodes) patt.check_dup_id(nodes) result = patt.exec_script(nodes=nodes, src="./dscripts/d25.floating_ip.sh", args=['init'] + [ip_takeover_version], sudo=True, timeout=1440) log_results(result)
def patroni_init(postgres_version, patroni_version, nodes): patt.host_id(nodes) patt.check_dup_id(nodes) result = patt.exec_script(nodes=nodes, src="./dscripts/d30.patroni.sh", payload='config/patroni.te', args=['init'] + [postgres_version] + [patroni_version] + ['patroni.te'], sudo=True) log_results(result)
def floating_ip_build(nodes, ip_takeover_version="0.9"): patt.host_id(nodes) patt.check_dup_id(nodes) result = patt.exec_script( nodes=nodes, src="./dscripts/d25.floating_ip.sh", payload=["./ip_takeover.py", "./ip_takeover.make"], args=['build'] + [ip_takeover_version], sudo=False) log_results(result)
def walg_ssh_known_hosts(cluster_name, nodes, archiving_server): logger.info ("processing {}".format ([n.hostname for n in nodes])) patt.host_id(nodes) patt.check_dup_id (nodes) result = patt.exec_script (nodes=nodes, src="./dscripts/d27.walg.sh", args=['ssh_known_hosts'] + [cluster_name] + [archiving_server[0].hostname], sudo=True) log_results (result) return not any(x == True for x in [bool(n.error) for n in result if hasattr(n,'error')])
def walg_ssh_gen(cluster_name, nodes, postgres_user='******'): logger.info ("processing {}".format ([n.hostname for n in nodes])) patt.host_id(nodes) patt.check_dup_id (nodes) result = patt.exec_script (nodes=nodes, src="./dscripts/d27.walg.sh", args=['ssh_archive_keygen'] + [cluster_name] + [postgres_user], sudo=True, log_call=True) log_results (result, hide_stdout=True) assert all(x == True for x in [bool(n.out) for n in result]) return [n.out for n in result]
def tuned_postgresql(nodes): nodes = list({n.hostname: n for n in nodes}.values()) logger.debug("tuned postgresql {}".format(nodes)) patt.host_id(nodes) patt.check_dup_id(nodes) result = patt.exec_script(nodes=nodes, src="./dscripts/d22_tuned.sh", args=['enable'], sudo=True) log_results(result)
def nft_init(nodes): nodes = list({n.hostname: n for n in nodes}.values()) logger.debug("nft_init {}".format([n.hostname for n in nodes])) patt.host_id(nodes) patt.check_dup_id(nodes) result = patt.exec_script(nodes=nodes, src="./dscripts/d01.nft.sh", args=['init'], sudo=True) log_results(result)
def add_repo(repo_url, nodes): nodes = list({n.hostname: n for n in nodes}.values()) logger.debug("add repo url {}".format(nodes)) patt.host_id(nodes) patt.check_dup_id(nodes) result = patt.exec_script(nodes=nodes, src="./dscripts/d03.repo.sh", args=['add'] + [" ".join(repo_url)], sudo=True) log_results(result)
def postgres_wait_ready(postgres_peers, postgres_version, timeout=120): logger.info("processing {}".format([n.hostname for n in postgres_peers])) patt.host_id(postgres_peers) patt.check_dup_id(postgres_peers) result = patt.exec_script(nodes=postgres_peers, src="./dscripts/pg_wait_ready.sh", args=['wait_pg_isready'] + [postgres_version] + [timeout], sudo=True) log_results(result) return not all(x == False for x in [bool(n.out) for n in result])
def disk_init(nodes, vol_size, mnt=None, user=None): nodes = list({n.hostname: n for n in nodes}.values()) logger.debug("disk init {}".format(nodes)) patt.host_id(nodes) patt.check_dup_id(nodes) util_init(nodes) if mnt: result = patt.exec_script(nodes=nodes, src="./dscripts/data_vol.py", args=['-m'] + [mnt] + ['-s'] + [vol_size], sudo=True) elif user: result = patt.exec_script(nodes=nodes, src="./dscripts/data_vol.py", args=['-u'] + [user] + ['-s'] + [vol_size], sudo=True) log_results(result)
def patroni_enable(postgres_version, patroni_version, nodes): patt.host_id(nodes) patt.check_dup_id(nodes) result = patt.exec_script(nodes=nodes, src="./dscripts/d30.patroni.sh", args=['enable'] + [postgres_version] + [patroni_version], sudo=True) log_results(result) random_node = [random.choice(nodes)] result = patt.exec_script(nodes=random_node, src="./dscripts/d30.patroni.sh", args=['check'], sudo=True) for r in result: logger.warn("hostname: {}".format(r.hostname)) return ("\n{}".format(r.out)) logger.warn("error: {}".format(r.error))
def postgres_gc_cron(nodes, vaccum_full_df_percent, target, postgres_version): logger.info("processing {}".format([n.hostname for n in nodes])) patt.host_id(nodes) patt.check_dup_id(nodes) tmpl = "./config/postgres-gc.sh.tmpl" vacuumdb_option = "" if postgres_version >= 12: vacuumdb_option = "--skip-locked" result = patt.exec_script( nodes=nodes, src="./dscripts/tmpl2file.py", payload=tmpl, args=['-t'] + [os.path.basename(tmpl)] + ['-o'] + [target] + ['--chmod'] + ['755'] + ['--dictionary_key_val'] + ["pc={}".format(vaccum_full_df_percent)] + ['--dictionary_key_val'] + ["vacuumdb_option={}".format(vacuumdb_option)] + ['--dictionary_key_val'] + ["postgres_version={}".format(postgres_version)], sudo=True) log_results(result)
def postgres_ssl_cert(cluster_name, postgres_user='******', nodes=[], keep_ca=True): ssl_script = "misc/self_signed_certificate.py" source = patt.Source() logger.info("processing {}".format([n.hostname for n in nodes])) patt.host_id(nodes) patt.check_dup_id(nodes) # if run via puppet it will install the cert on the running agent but # not the others node before running the installer. # Retrieve and distribue the cert to all nodes running_node = source.whoami(nodes) ca_provider = nodes if running_node: #ca_provider=[running_node] self_ca_dir = None # installing from peer else: # not installing from peer if keep_ca: self_home = os.path.expanduser("~") self_ca_dir = self_home + '/' + '.patt/ca' Path(self_ca_dir).mkdir(parents=True, exist_ok=True, mode=0o700) for i in ['root.key', 'root.crt']: tmp = None for k in range(5): try: tmp = postgres_get_cert(q=i, postgres_user=postgres_user, nodes=ca_provider) assert isinstance(tmp, (str, bytes)) except: # generate CA on first node and retry result = patt.exec_script( nodes=[sorted(nodes, key=lambda n: n.hostname)[0]], src="dscripts/ssl_cert_postgres.py", payload=ssl_script, args=['-c'] + [cluster_name] + ['-s'] + [os.path.basename(ssl_script)] + ['-u'] + [postgres_user] + ['--ca_country_name', "'UK'"] + ['--ca_state_or_province_name', "'United Kingdom'"] + ['--ca_locality_name', "'Cambridge'"] + ['--ca_organization_name', "'Patroni Postgres Cluster'"] + ['--ca_common_name', "'CA {}'".format(cluster_name)] + ['--ca_not_valid_after', "'3650'"] + ['-p'] + [p.hostname for p in nodes] + list([" ".join(p.ip_aliases) for p in nodes]), sudo=True) log_results(result) tmp = postgres_get_cert( q=i, postgres_user=postgres_user, nodes=[sorted(nodes, key=lambda n: n.hostname)[0]]) if isinstance(tmp, (str, bytes)): break else: time.sleep(3) continue else: break assert isinstance(tmp, (str, bytes)) with tempfile.TemporaryDirectory() as tmp_dir: with open(tmp_dir + '/' + i, "w") as cf: cf.write(tmp) cf.write('\n') cf.flush() cf.close() os.chmod(cf.name, 0o640) if self_ca_dir: if os.path.isdir(self_ca_dir): t = self_ca_dir + '/' + cluster_name + '-' + os.path.basename( cf.name) if not os.path.isfile(t): shutil.copy2(cf.name, t) result = patt.exec_script( nodes=nodes, src="dscripts/ssl_cert_postgres.sh", payload=tmp_dir + '/' + i, args=['copy_ca', os.path.basename(tmp_dir + '/' + i), i], sudo=True) log_results(result, hide_stdout=True) result = patt.exec_script( nodes=nodes, src="dscripts/ssl_cert_postgres.py", payload=ssl_script, args=['-c'] + [cluster_name] + ['-s'] + [os.path.basename(ssl_script)] + ['-u'] + [postgres_user] + ['-p'] + [p.hostname for p in nodes] + list([" ".join(p.ip_aliases) for p in nodes]), sudo=True) log_results(result)
else: haproxy_peers=nodes walg_ssh_destination = None if cfg.walg_ssh_destination: walg_ssh_destination = patt.to_nodes ([cfg.walg_ssh_destination], ssh_login, cfg.ssh_keyfile) progress_bar (1, 14) # Peer check for p in [etcd_peers, postgres_peers, haproxy_peers]: for n in patt.check_priv(p): assert (n.sudo == True) patt.host_id(p) patt.host_ip_aliases(p) patt.check_dup_id ([p for p in etcd_peers]) patt.check_dup_id ([p for p in postgres_peers]) patt.check_dup_id ([p for p in haproxy_peers]) logger.info ("cluster name : {}".format(cfg.cluster_name)) logger.info ("cluster nodes : {}".format([(n.hostname, n.ip_aliases) for n in nodes])) logger.info ("etcd_peers : {}".format([(n.hostname, n.id, n.ip_aliases) for n in etcd_peers])) logger.info ("postgres_peers : {}".format( [(n.hostname, n.id, n.ip_aliases) for n in postgres_peers])) if cfg.haproxy_template_file: logger.info ("haproxy_peers : {}".format([(n.hostname, n.id) for n in haproxy_peers])) progress_bar (2, 14) if cfg.add_repo: patt_syst.add_repo (repo_url=cfg.add_repo, nodes=etcd_peers)
def etcd_init(cluster_name, nodes): patt.host_id(nodes) patt.check_dup_id (nodes) id_hosts = [n.id + '_' + n.hostname for n in nodes] result = patt.exec_script (nodes=nodes, src="./dscripts/d10.etcd.sh", args=['init'] + [cluster_name] + id_hosts, sudo=True) log_results (result) good_members = get_members(nodes, cluster_name, 'ok') bad_members = get_members(nodes, cluster_name, 'bad') initialized = not (not good_members and not bad_members) logger.info ("initialized cluster: {}".format (initialized)) logger.info ("member ok {}".format (good_members)) logger.info ("member ko {}".format (bad_members)) init_node = pick_init_node(nodes) if not initialized: heartbeat_interval=10 # rtt_matrix = patt.rtt6 (nodes) # heartbeat_interval=wca(rtt_matrix) * 1.5 # if heartbeat_interval < 5: election_timeout=50 # else: # election_timeout=int (10 * heartbeat_interval) id_hosts = "{}_{}".format (init_node.id, init_node.hostname) result = patt.exec_script (nodes=[init_node], src="./dscripts/d10.etcd.sh", args=['config'] + ['new'] + [cluster_name] + [id_hosts], sudo=True) # [heartbeat_interval] + [election_timeout] + id_hosts, sudo=True) log_results (result) result = patt.exec_script (nodes=[init_node], src="./dscripts/d10.etcd.sh", args=['enable'] + [cluster_name] + [id_hosts], sudo=True) log_results (result) bad_members = get_members([init_node], cluster_name, 'bad') for i in range(3): good_members = get_members([init_node], cluster_name, 'ok') if good_members: break time.sleep(11) # > than dscripts/d10.etcd.sh file locks wait logger.info ("member ok {}".format (good_members)) logger.info ("member ko {}".format (bad_members)) if init_node.hostname not in good_members: result = patt.exec_script (nodes=[init_node], src="./dscripts/d10.etcd.sh", args=['disable'] + [cluster_name] + [id_hosts], sudo=True) raise EtcdError ('cluster init error', "error initialising new cluster {}".format(cluster_name)) # process any remaining members one by one using one of the healthy nodes as a controller good_members = get_members([init_node], cluster_name, 'ok') bad_members = get_members([init_node], cluster_name, 'bad') ctrl = [n for n in nodes if n.hostname in good_members] members = ctrl nodes_to_remove = [n for n in bad_members if n not in [l.hostname for l in nodes]] logger.info ("to remove: {}".format (nodes_to_remove)) if nodes_to_remove: result = patt.exec_script (nodes=[ctrl[0]], src="./dscripts/d10.etcd.sh", args=['member_remove'] + [cluster_name] + nodes_to_remove, sudo=True) log_results (result) good_members = get_members([init_node], cluster_name, 'ok') bad_members = get_members([init_node], cluster_name, 'bad') nodes_to_process = [n for n in nodes if n.hostname not in good_members and n.hostname not in bad_members] logger.info ("to process: {}".format ([n.hostname for n in nodes_to_process])) for m in nodes_to_process: logger.info ("process etcd member {}".format (m.hostname)) if not m.hostname in [n.hostname for n in members]: members.append (m) id_hosts = [n.id + '_' + n.hostname for n in members] # assert cluster_health([init_node]), "add member require no unhealthy nodes in the cluster" assert ctrl, "no usable controller node" # only the first control node is used to add member for i in range(10): result = patt.exec_script (nodes=[ctrl[0]], src="./dscripts/d10.etcd.sh", args=['member_add'] + [cluster_name] + id_hosts, sudo=True) log_results (result) if all(x == False for x in [bool(n.error) for n in result]): break time.sleep(3.0) result = patt.exec_script (nodes=members, src="./dscripts/d10.etcd.sh", args=['config'] + ['existing'] + [cluster_name] + id_hosts, sudo=True) # [heartbeat_interval] + [election_timeout] + id_hosts, sudo=True) log_results (result) result = patt.exec_script (nodes=members, src="./dscripts/d10.etcd.sh", args=['enable'] + [cluster_name] + id_hosts, sudo=True) log_results (result) good_members = get_members([init_node], cluster_name, 'ok') bad_members = get_members([init_node], cluster_name, 'bad') logger.warn ("member ok {}".format (good_members)) logger.warn ("member ko {}".format (bad_members)) assert good_members ok_nodes = [n for n in nodes if n.hostname in good_members and n.hostname not in bad_members] assert ok_nodes if bad_members: time.sleep(3) bad_members = get_members(ok_nodes, cluster_name, 'bad') assert not bad_members random_node = [random.choice(ok_nodes)] result = patt.exec_script (nodes=random_node, src="./dscripts/d10.etcd.sh", args=['check'], sudo=True) for r in result: logger.warn ("hostname: {}".format(r.hostname)) return ("\n{}".format (r.out)) logger.warn ("error: {}".format (r.error))