if uuid.startswith("ec2"): raise NotImplementedError( "HUT appears to be an aws instance which is currently unsupported by Power plugin") assert bool(self._host.SshDirect.execute('cat /proc/cpuinfo | grep -i hypervisor || [[ $? == 1 ]]', timeout=3).strip()), \ "Tried to power off a machine which isnt a vm" try: self.rm_ep = self._host.resource_manager_ep except KeyError: raise Exception("Please configure host's resource manager (in hardware.yaml) before using power plugin") self.url = f'http://{self.rm_ep}/vms/{self.id}' def off(self): self.verify_available() url = f'{self.url}/status' res = requests.post(url, json={"power": "off"}) assert res.status_code == 200 def on(self): url = f'{self.url}/status' res = requests.post(url, json={"power": "on"}) assert res.status_code == 200 def status(self): url = f'{self.url}' res = requests.get(url) assert res.status_code == 200 return res.json()['info']['status'] plugins.register('Power', Power)
kwargs["global.pullPolicy"] = image_pull_policy cmd_options = "" if self.app_exists(app_name) and force: logging.info(f"{app_name} is Already exists, Running upgrade...") self.upgrade_app(app_name, version, **kwargs) else: rancher_cmd = "rancher app install" for k, v in kwargs.items(): cmd_options += f" --set {k}={v}" cmd_options += f" --version {version} --namespace {namespace} {app_name} {app_name} --no-prompt" cmd = f"sudo gravity exec {rancher_cmd} {cmd_options}" logging.debug(cmd) self._host.SshDirect.execute(cmd) if wait: self.wait_for_app(app_name, timeout) def delete_app(self, app_name): self._host.SshDirect.execute( f"sudo gravity exec rancher app delete {app_name}") def app_exists(self, app_name): res = requests.get( f"{self.BASE_URL}/v3/project/local:p-8n6zr/apps/p-8n6zr%3A{app_name}", headers=self.auth_header, verify=False) assert res.status_code == 200 or res.status_code == 404 return res.status_code != 404 plugins.register("Rancher", Rancher)
try: nodes_status = json.loads( self._host.Docker.run_cmd_in_service( 'memsql', 'gosu memsql memsql-admin list-nodes --json')) except Exception as e: raise Exception("Failed to execute node-status command") from e else: if not all([ node['processState'] == 'Running' and node['isConnectable'] and node['recoveryState'] == 'Online' for node in nodes_status['nodes'] ]): raise Exception(f"memsql is not ready {nodes_status}") def reset_state(self): self.connection.truncate_all() def verify_functionality(self): dbs = self.fetch_all("show databases") def stop_service(self): self._host.Docker.stop_container("memsql") def start_service(self): self._host.Docker.start_container("memsql") self._host.Docker.wait_container_up("memsql") waiter.wait_nothrow(self.ping, timeout=30) plugins.register('Memsql', Memsql)
"/tmp/resource") fs_content = self._host.SSH.get_contents("/tmp/resource") assert fs_content == test_content self.deploy_multiple_resources_to_proxy_container( files[0:2], "/tmp/resource_multiple") filesnum = int( self._host.SSH.execute( "ls -1 /tmp/resource_multiple | wc -l").strip()) assert filesnum == 2 logging.info( "<<<<<<<<<RESOURCE_MANAGER PLUGIN FUNCTIONING PROPERLY>>>>>>>>>>>>>>>>>>" ) return True plugins.register('ResourceManager', ResourceManager) class ProgressPercentage(object): def __init__(self, filename): self._filename = filename self._size = float(os.path.getsize(filename)) self._seen_so_far = 0 self._lock = threading.Lock() def __call__(self, bytes_amount): # Set this object as a callbck to upload/download file to print progress to stdout. with self._lock: self._seen_so_far += bytes_amount percentage = (self._seen_so_far / self._size) * 100 sys.stdout.write(
exclude_expr = " ".join([f"--exclude {exclude_dir}" for exclude_dir in exclude_dirs]) prefix = f"sshpass -p {self._connection.password} rsync -ravh --delete {exclude_expr} -e \"ssh -p {self._connection.port} -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o LogLevel=ERROR\"" cmd = f"{prefix} {src} {self._connection._username}@{self._host.ip}:{dst}" subprocess.check_output(cmd, shell=True) def compress(self, src, dst): src = src if type(src) is list else [src] dst = dst if dst.endswith('tar.gz') else '{dst}.tar.gz' try: self.execute(f"tar --warning=no-file-changed --use-compress-program=pigz -cvf {dst} {' '.join([shlex.quote(folder) for folder in src])}") except SSHCalledProcessError as e: if e.returncode == 1: logging.info("compress should have succeeded, the return code is 1 only because 'some files differ'") logging.info(f"ls on dest dir: {self.execute(f'ls -al {os.path.dirname(dst)}')}") else: raise class SSHCalledProcessError(CalledProcessError): def __init__(self, returncode, cmd, output=None, stderr=None, host=None): super(SSHCalledProcessError, self).__init__(returncode, cmd, output, stderr) self.host = host def __str__(self): return "Command '%s' on host %s returned non-zero exit status %d\nstdout: %s\nstderr: %s" % \ (self.cmd, self.host.ip, self.returncode, self.output, self.stderr) plugins.register('SshDirect', SshDirect)
self._ssh_direct.execute(cmd) def recreate_service(self, compose_file_path, service_name): self.purge_service(compose_file_path, service_name) self.create_service(compose_file_path, service_name) def refresh_compose(self, compose_file_path): cmd = f"{self.compose_bin_path} -f {compose_file_path} up --no-start --no-deps" self._ssh_direct.execute(cmd) def run_foreground(self, compose_file_path, *services): services_to_refresh = " ".join([service for service in services]) cmd = f"{self.compose_bin_path} -f {compose_file_path} up --no-deps --no-recreate --no-color {services_to_refresh}" self._ssh_direct.execute(cmd) def restart_services(self, compose_file_path, *services): services_to_refresh = " ".join([service for service in services]) cmd = f"{self.compose_bin_path} -f {compose_file_path} restart {services_to_refresh}" self._ssh_direct.execute(cmd) def services(self, compose_file_path): cmd = f"docker-compose -f {compose_file_path} config --services" return self._ssh_direct.execute(cmd).strip().split('\n') def service_images(self, compose_file_path): cmd = f"docker-compose -f {compose_file_path} ps -q -a" return self._ssh_direct.execute(cmd).strip().split('\n') plugins.register("DockerCompose", DockerCompose)
raise Exception("Infra doesnt know how to reset_state of kafka on K8s") def restart(self): container.restart_container_by_service(self._host, "kafka") def stop(self): container.stop_container_by_service(self._host, "kafka") def start(self): container.start_container_by_service(self._host, "kafka") def delete_storage_compose(self): self._host.SshDirect.execute('sudo rm -rf /storage/kafka/*') def log_kafka_rpyc_server_errors(self): logging.error("had problem with kafka rpyc server") logging.info(f"infra thinks that background rpyc server is running: {self._rpyc.running()}") logging.info(f"return code: {self._rpyc.returncode}") logging.info(f"output: {self._rpyc.output}") logging.info(f"stderr: {self._rpyc.error}") logging.info(f"docker ps | grep kafka: %s", self._host.SshDirect.execute('docker ps | grep kafka || [[ $? == 1 ]]').split('\n')) logging.info("python3 processes (no grep): %s", self._host.SSH.execute('ps -ef | grep python3 | grep -v grep || [[ $? == 1 ]]').split('\n')) logging.info("all python3 processes (just to be sure): %s", self._host.SSH.execute('ps -ef | grep python3 ').split('\n')) plugins.register('Kafka', Kafka)
return json.loads( self._host.SshDirect.execute( "sudo gravity exec helm list -q --output json")) def repo_update(self, flags=None): flags = flags or "" self.repo(f"update {flags}") def repo(self, command): return self._host.SshDirect.execute( f"sudo gravity exec helm repo {command}") def install(self, name, chart, flags=None): flags = flags or "" return self._host.SshDirect.execute( f"sudo gravity exec helm install --name {name} {chart} {flags}") def delete(self, name, flags=None): flags = flags or "" try: return self._host.SshDirect.execute( f"sudo gravity exec helm delete name {name} {flags}") except SSHCalledProcessError as e: # We don't want to fail in case their isn't anything to delete if "not found" in e.output: pass logging.error(e.output) plugins.register("Helm", Helm)
def get_key(self, key): return self._redis.get(key) def key_exists(self, key): return self._redis.exists(key) def delete_key(self, key): return self._redis.delete(key) def clear_and_start(self): self._redis.flushall() container.start_container_by_service(self._host, "_redis") waiter.wait_nothrow(self.ping, timeout=30) def ping(self): return self._redis.ping() def wait_for_redis_to_be_up(self): waiter.wait_for_predicate(lambda: self.ping(), timeout=30) def verify_functionality(self): self.ping() assert self.set_key("test_key", "test_value") assert self.key_exists("test_key") assert b"test_value" in self.get_key("test_key") assert self.delete_key("test_key") assert not self.key_exists("test_key") plugins.register('Redis', Redis)
raise Exception(f"unable to find {name} {resource_type} pods") pvc_list = [] pv_list = [] for pod in pod_list: pod_name = pod["metadata"]["name"] logging.debug(f"get pvc name from {name} pod") pvc_name = self.get_pvc_by_pod_name(pod_name) pvc_list.append(pvc_name) logging.debug(f"get pv name from {pvc_name} pvc") pv_name = self.get_pv_by_pvc_name(pvc_name) pv_list.append(pv_name) for pv in pv_list: logging.debug(f"set reclaim policy \"Delete\" to {pv} pv") self.set_pv_reclaim_policy(pv, "Delete") logging.debug(f"scale down {resource_type}: {name}") self.scale(name, resource_type, replicas=0) self.delete_pod_by_label(label_value, label_name, "true", 0) wait_for_predicate( lambda: self.num_of_pod_replicas(name, resource_type) == 0, 120) for pvc in pvc_list: logging.debug(f"delete {pvc} pvc") self.delete_pvc(pvc) logging.debug(f"scale up {resource_type} {name}") self.scale(name, resource_type, replicas=num_of_pods) wait_for_predicate_nothrow( lambda: self.num_of_ready_pod_replicas(name, resource_type) == num_of_pods, 180) plugins.register("K8s", K8s)
@property def address_on_remote(self): return f"127.0.0.1:{self._registry_port}" def _tunneled_image_name(self, image_fqdn): image_name = image_fqdn.split('/')[-1] return f"{self.local_address}/{image_name}" def _remote_image_name(self, image_fqdn): image_name = image_fqdn.split('/')[-1] return f"{self.address_on_remote}/{image_name}" def deploy(self, image_fqdn, remote_name=None): logging.info(f"Deploy {image_fqdn} to {self._host.ip}") self.start() temp_image = self._tunneled_image_name(image_fqdn) docker_utils.tag(image_fqdn, temp_image) remote_name = remote_name or image_fqdn try: docker_utils.push(temp_image) image_name_on_remote = self._remote_image_name(image_fqdn) self._host.Docker.pull(image_name_on_remote) self._host.Docker.tag(image_name_on_remote, remote_name) self._host.Docker.rmi(image_name_on_remote) finally: docker_utils.rmi(temp_image) plugins.register("DockerRegistry", DockerRegistry)
res = self.status() return res['cluster']['nodes'][0]['advertise_ip'] def download_gravity(self, download_request): self._host.SshDirect.execute(download_request) def gravity_make_executable(self, gravity_path): self._host.SshDirect.execute(f"sudo chmod +x {gravity_path}") def join(self, master_ip, join_token, role='node', cloud_provider='generic'): self._host.SshDirect.execute( f'sudo gravity join {master_ip} --token={join_token} --role={role} --cloud-provider={cloud_provider}' ) def ping(self): assert self.nodes() assert self.status() def test_functionality(self): self.ping() assert self.master_ip assert self.token logging.info("<<<<<<<GRAVITY PLUGIN FUNCTIONING PROPERLY>>>>>>>>>>>>.") plugins.register('Gravity', Gravity)
from infra.model.host import Host from infra.model import plugins from automation_infra.plugins import connection from automation_infra.plugins.ssh_direct import SshDirect class SSH(SshDirect): def connect(self, port=2222, timeout=10, user="******", password="******"): # TODO: have handle security here host = Host.from_args(self._host.ip, user, password, port=port, alias=self._host.alias) self._connection = connection.Connection(host) self._connection.connect(timeout) @property def _using_keyfile(self): return False plugins.register("SSH", SSH)
source_port=None): protocol_cmd = self.protocol_cmd(protocol) iptables_filter = self._filter(source_service, source_port, service_name, service_port) cmd = f"sudo iptables -w --insert {self.AUTOMATION_CHAIN} {protocol_cmd} {iptables_filter} -j DROP" self._ssh.execute(cmd) def undrop(self, service_name, protocol=None, service_port=None, source_service=None, source_port=None): protocol_cmd = self.protocol_cmd(protocol) iptables_filter = self._filter(source_service, source_port, service_name, service_port) self._ssh.execute( f"sudo iptables -w --delete {self.AUTOMATION_CHAIN} {protocol_cmd} {iptables_filter} -j DROP" ) @staticmethod def protocol_cmd(protocol): return f"-p {protocol}" if protocol else "" def reset_state(self): self.flush_or_create() self.activate_automation_chain() plugins.register('Iptables', Iptables)
'mongodb', self.DNS_NAME, self.PORT) return tunnel def _get_client(self, credentials=None): uri = f"mongodb://{self.tunnel.local_endpoint}" if not credentials else \ f"mongodb://{credentials['username']}:{credentials['password']}@{self.tunnel.local_endpoint}" client = MongoClient(uri) return client @staticmethod def ping(mongodb_conn): try: mongodb_dbs_list = mongodb_conn.list_database_names() assert len(mongodb_dbs_list) > 0 except Exception: raise Exception("failed to connect to mongodb") def verify_functionality(self): mongodb_conn = self.client self.ping(mongodb_conn=mongodb_conn) plugins.register('Mongodb', Mongodb) @hardware_config(hardware={"host": {}}) def test_basic(base_config): host = next(iter(base_config.hosts.values())) mongodb = host.Mongodb mongodb.verify_functionality()
self.SSH.disconnect() if "SshDirect" in self.__plugins: self.SshDirect.disconnect() self.__plugins.clear() def unique(self): return next(self._temp_dir_counter) def __str__(self): return self.ip @classmethod def from_args(cls, ip, user, password=None, key_file_path=None, pem_key_string=None, **kwargs): basic = {"ip": ip, "user": user, "password": password, "key_file_path": key_file_path, "pem_key_string": pem_key_string } basic.update(**kwargs) return cls(**basic) plugins.register('Host', Host) def test_functionality(): host1 = Host(**host_config_example1) host2 = Host(**host_config_example2) host3 = Host(**host_config_example3) host4 = Host.from_args('0.0.0.0', 'user', 'pass') host5 = Host.from_args('0.0.0.0', 'user', key_file_path='/path/to/pem')
return self._ssh_direct.execute(f'truncate -s 0 {logpath}') def pull(self, image_fqdn): cmd = f"{self._docker_bin} pull {image_fqdn}" return self._ssh_direct.execute(cmd) def tag(self, image_name, new_image_name): cmd = f"{self._docker_bin} tag {image_name} {new_image_name}" return self._ssh_direct.execute(cmd) def rmi(self, image_name): cmd = f"{self._docker_bin} rmi {image_name}" return self._ssh_direct.execute(cmd) def image_ids(self, image_regexp): cmd = f"{self._docker_bin} images -q --filter=reference='*{image_regexp}*'" return self._ssh_direct.execute(cmd).strip().split('\n') def labels(self, container_id): return self.inspect(container_id)['Config']['Labels'] def image_fqdn(self, container_id): return self.inspect(container_id)['Config']['Image'] def change_restart_policy(self, container_id, policy): cmd = f"{self._docker_bin} update {container_id} --restart={policy}" self._ssh_direct.execute(cmd) plugins.register("Docker", Docker)
def delete_queue(self, queue): url = f"http://{self.admin_tunnel.local_endpoint}/api/queues/{self.virtual_host if self.virtual_host != '/' else '%2F'}/{queue}" response = requests.delete(url, auth=(self.user, self.password)) response.raise_for_status() logging.debug(f'Done remove queue {queue}') def reset_state(self): for queue in self.get_queue_list(): self.delete_queue(queue) logging.debug('Done remove all queues') def get_vhost_node(self): url = f"http://{self.admin_tunnel.local_endpoint}/api/queues/{self.virtual_host if self.virtual_host != '/' else '%2F'}" response = requests.get(url, auth=(self.user, self.password)) response.raise_for_status() return response.json()[0]['node'] plugins.register('Rabbitmq', Rabbitmq) @hardware_config(hardware={"host": {}}) def test_basic(base_config): host = next(iter(base_config.hosts.values())) rmq = host.Rabbitmq rmq_connection = rmq.create_amqp_connection() rmq_connection.ping() rmq_connection.verify_functionality() rmq_connection.close()
else prometheus_connection_config['port']['k8s'] self.start_tunnel(self.DNS_NAME, self.PORT) self.url = f'{prometheus_connection_config["url"]["compose"]}:{self.local_bind_port}' if not self.is_k8s \ else f'{prometheus_connection_config["url"]["k8s"]}' if self.is_k8s: with open('/etc/hosts', 'r+') as f: content = f.read() if f"{host.ip} {self.DNS_NAME}" not in content: logging.info( f"write new line in hosts file: {host.ip} {self.DNS_NAME}" ) f.write(f'\n{host.ip} {self.DNS_NAME}\n') self.headers = None if not self.is_k8s \ else {'Authorization': f'Basic {prometheus_connection_config["auth"]}'} self._prom = Prometheus(url=self.url, headers=self.headers) def query(self, query): return json.loads(self._prom.query(metric=query)) def ping(self): self.query(query='prometheus_engine_queries') plugins.register('PrometheusService', PrometheusService) @hardware_config(hardware={"host": {}}) def test_basic(base_config): prom = base_config.hosts.host.PrometheusService prom.ping()
def __init__(self, host): self._host = host def flush_journal(self): self._host.SshDirect.execute("sudo journalctl --vacuum-time=1s") def log_to_journal(self, msg): cmd = f"echo '{msg}' | systemd-cat -t TESTING -p info" self._host.SshDirect.execute(cmd) def set_timezone(self, timezone): cmd = f"sudo timedatectl set-timezone {timezone}" self._host.SshDirect.execute(cmd) def machine_id(self): return self._host.SshDirect.execute('sudo cat /sys/class/dmi/id/product_uuid').strip() def exists(self, path): try: self._host.SshDirect.execute(f'ls {path}') return True except: return False def rm(self, path): return self._host.SshDirect.execute(f"rm {path} -rf") plugins.register('Admin', Admin)
logging.debug(f"upload result to {bucket}/{s3_path} is {result}") return f'{bucket}/{s3_path}' def download_resource_from_s3(self, bucket, s3_path, local_folder): self.download_to_filesystem(s3_path, local_folder, bucket) def delete_resource_from_s3(self, bucket, s3_path): self.delete_file(bucket, s3_path) def deploy_multiple_resources_to_s3(self, aws_file_list, aws_folder, s3_folder): resources_s3_list = [] for resource in aws_file_list: resources_s3_list.append( self.deploy_resource_to_s3(os.path.join(aws_folder, resource), os.path.join(s3_folder, resource))) return resources_s3_list def stop_service(self): self._host.Docker.stop_container("seaweedfs") self._host.Docker.wait_container_down("seaweedfs") def start_service(self): self._host.Docker.start_container("seaweedfs") self._host.Docker.wait_container_up("seaweedfs") waiter.wait_nothrow(self.ping, timeout=30) def service_running(self): return self._host.Docker.is_container_up("seaweedfs") plugins.register('Seaweed', Seaweed)
from automation_infra.plugins.ssh_direct import SshDirect from automation_infra.utils.waiter import wait_for_predicate from devops_automation_infra.utils.health_check import host_is_active from infra.model import plugins class Power(object): def __init__(self, host): self._host = host def reboot(self, options=""): # Reboots the host and verifies using a ping host = self._host host.SshDirect.execute( f"sudo /sbin/reboot {options} > /dev/null 2>&1 &", timeout=0.1) wait_for_predicate(lambda: not host_is_active(host.ip), timeout=20) plugins.register("Power", Power)
) ssh_direct.execute(run_cmd) if f"manifest for gcr.io/anyvision-training/automation-proxy:{self._automation_proxy_version()} not found" in e.stderr: logging.error( f"tag {self._automation_proxy_version()} was not pushed to gcr, " f"please run make push-automation-proxy from devops-infra repo" ) raise e else: raise e logging.debug("docker is running") def kill(self): if not self.running: logging.debug("nothing to remove") return logging.debug("trying to remove docker container") self._ssh_direct.execute( f"{self._docker_bin_path} kill automation_proxy") waiter.wait_for_predicate(lambda: not self.running) logging.debug("removed successfully!") def restart(self): self.run() def clear(self): self.kill() plugins.register("ProxyContainer", ProxyContainer)
def fetch_count(self, query): with closing( self.connection.cursor( cursor_factory=psycopg2.extras.DictCursor)) as cursor: cursor.execute(query) res = cursor.fetchone() return res['count'] def ping(self): dbs = self.fetch_all("select datname as db from pg_database") def reset_state(self): dbs = self.fetch_all("select datname as db from pg_database") # TODO: what needs to be truncated here exactly? I see the following dbs: #postgres || anv_db || template1 || template0 || kong def verify_functionality(self): # TODO: check flow logic here. dbs = self.fetch_all("select datname as db from pg_database") logging.info( "<<<<<<<POSTGRES PLUGIN FUNCTIONING PROPERLY>>>>>>>>>>>>>") plugins.register('Postgresql', Postgresql) @hardware_config(hardware={"host": {}}) def test_basic(base_config): pg = base_config.hosts.host.Postgresql pg.verify_functionality()
def verify_functionality(self): self.put_key('test_key', 'test_value') self.get_key('test_key') self.delete_key('test_key') first_service = next(iter(self.get_services())) self._consul.health.service(first_service)[1] self.put_key('test_key_int', 2) self.put_key('test_key_float', 2.5) dict_value = {"a": 1, "b": 2.4, "c": "bla"} self.put_key('test_key_json', json.dumps(dict_value)) int_value = self.get_value("test_key_int") assert int_value == 2, "int value not in consul" float_value = self.get_value("test_key_float") assert float_value == 2.5, "float value not in consul" json_value = self.get_value("test_key_json") assert json_value == dict_value, "dict value not in consul" def get_key_layered(self, service_name, key): layers_read_order = [ self.OVERRIDE_KEY, self.APPLICATION_KEY, self.DEFAULT_KEY ] for layer in layers_read_order: layered_key = f"{layer}/{service_name}/{key}" value = self.get_key_if_exists(layered_key) if value is not None: return value return None plugins.register('Consul', Consul)