def venv_info(bin_dir): if bin_dir[-1] != "/": bin_dir += "/" output, error = get_output(bin_dir + "pip freeze --all") pkgs = parse_freeze(output) output, error = get_output(bin_dir + "python --version") pkgs["python"] = parse_python_version(output, error) return pkgs
def pipenv_info(directory): directory = os.path.abspath(directory) # run pipenv using the serverscripts' interpreter pipenv = sys.executable + " -m pipenv" output, error = get_output(pipenv + " --where", cwd=directory) if output.strip() != directory: logger.error("No pipenv found in %s", directory) return output, error = get_output(pipenv + " run pip freeze --all", cwd=directory) pkgs = parse_freeze(output) output, error = get_output(pipenv + " run python --version", cwd=directory) pkgs["python"] = parse_python_version(output, error) return pkgs
def _database_infos(): """Return dict with info about the databases {database name: info}""" query = "select datname, pg_database_size(datname) from pg_database;" command = "sudo -u postgres psql -c '%s' --tuples-only" % query output, error = get_output(command) if error: logger.warning("Error output from psql command: %s", error) result = {} for line in output.split("\n"): if "|" not in line: continue parts = line.split("|") name = parts[0].strip() size = parts[1].strip() # in MB if name.startswith("template") or name == "postgres": logger.debug("Omitting database %s", name) continue size = int(size) database_info = copy.deepcopy(DATABASE_TEMPLATE) database_info["name"] = name database_info["size"] = size result[name] = database_info logger.info( "Found database %s with size %s (%s MB)", name, size, size / 1024 / 1024 ) return result
def python_details(container): """Run pip freeze in containers that are python-based A container is python based if it has "python" in its command. """ # identify the python interpreter inside the docker split_command = container["command"].strip('"').split(" ") for python_exec in PYTHON_EXEC_OPTIONS: if python_exec in split_command: break else: # it is some other command (gunicorn; bin/gunicorn) dirname = os.path.dirname(split_command[0]) if dirname == "": python_exec = "python3" # global default else: python_exec = os.path.join(dirname, "python") python_in_docker = "docker exec " + container[ "id"] + " " + python_exec + " " # identify the python version command = "--version" logger.debug("Running %s %s in container '%s'..", python_exec, command, container["names"]) output, _ = get_output(python_in_docker + command, fail_on_exit_code=False) if output.startswith(DOCKER_EXEC_ERROR) or output.startswith("Traceback"): logger.info("Did not find Python in docker %s", container["names"]) return {} python_version = parse_python_version(output, "") logger.info( "Found Python %s ('%s') in container '%s'..", python_version, python_exec, container["names"], ) # identify the python packages (eggs) command = "-m pip freeze --all" logger.debug("Running %s %s in container '%s'..", python_exec, command, container["names"]) output, _ = get_output(python_in_docker + command, fail_on_exit_code=False) if output.startswith(DOCKER_EXEC_ERROR): logger.warning("Error output from pip freeze in docker: %s", output) eggs = parse_freeze(output) eggs["python"] = python_version return {"eggs": eggs}
def eggs_info(directory): files_of_interest = ["django", "test", "python"] possible_egg_dirs = set() python_version = None before = copy.copy(sys.path) bin_dir = os.path.join(directory, "bin") if not os.path.exists(bin_dir): return bin_dir_contents = os.listdir(bin_dir) for file_ in files_of_interest: if file_ not in bin_dir_contents: continue if possible_egg_dirs: logger.debug("Omitting bin/%s, we already have our info", file_) continue logger.debug("Looking in bin/%s for eggs+versions", file_) new_contents = [] lines = open(os.path.join(directory, "bin", file_)).readlines() for line in lines: # Skipping imports that may be unavailable in the current path. if line.strip() != "import sys": # When we see these lines we have moved past the sys.path: if ("import " in line or "os.chdir" in line or "__import__" in line or "_interactive = True" in line): break new_contents.append(line) # This is very evil, but cool! Because of the __name__ != main the # remainder of the script is not executed. exec("".join(new_contents)) possible_egg_dirs.update(sys.path) # Detect python executable first_line = lines[0].strip() python_executable = first_line.lstrip("#!") output, error = get_output("%s --version" % python_executable, cwd=directory) try: total_output = output + error python_version = total_output.strip().split()[1] # ^^^ stdout (3) / stderr (2) outputs "Python 2.7.10" except IndexError: python_version = "UNKNOWN" logger.debug("Python version used: %s", python_version) # reset sys.path sys.path = before eggs = {} for dir_ in possible_egg_dirs: info = list(pkg_resources.find_distributions(dir_, only=True)) if len(info) == 0: continue info = info[0] eggs[info.project_name] = info.version if "Python" in eggs: del eggs["Python"] # This is the version we run with, it seems. eggs["python"] = python_version return eggs
def _postgres_version(): output, error = get_output("ps ax") lines = output.split("\n") for line in lines: if POSTGRES_VERSION.match(line): match = POSTGRES_VERSION.search(line) version = match.group("version") return version return ""
def git_info(directory): """Return git information (like remote repo) for the directory""" logger.debug("Looking in %s...", directory) data = {} dir_contents = os.listdir(directory) if ".git" not in dir_contents: logger.warning("No .git directory found in %s", directory) return output, error = get_output("git remote -v", cwd=directory) for line in output.split("\n"): if not line: continue match = GIT_URL.search(line) if not match: logger.warning("Non-recognized 'git remote -v' line: %s", line) continue data["url"] = "https://github.com/{user}/{project}".format( user=match.group("user"), project=match.group("project")) logger.debug("Git repo found: %s", data["url"]) output, error = get_output("git status", cwd=directory) output = output.lower() if "master" in output: data["release"] = "master" logger.debug("It is a master checkout") elif "main" in output: data["release"] = "main" logger.debug("It is a 'main' checkout") else: output, error = get_output("git describe", cwd=directory) first_line = output.split("\n")[0] data["release"] = first_line.strip() logger.debug("We're on a tag or branch: %s", data["release"]) data["has_local_modifications"] = "changes not staged" in output data["has_untracked_files"] = "untracked" in output return data
def django_info_buildout(bin_django): matplotlibenv = "MPLCONFIGDIR=/tmp" # ^^^ Corner case when something needs matplotlib in django's settings. target_user_id = os.stat(bin_django).st_uid command = "sudo -u \\#%s %s %s diffsettings" % ( target_user_id, matplotlibenv, bin_django, ) logger.debug("Running %s diffsettings...", bin_django) output, error = get_output(command) if error: logger.warning("Error output from diffsettings command: %s", error) if not output: return return parse_django_info(output)
def container_details(): """Return a list of details of running containers The fields are all fields that docker ps can return. See: See https://docs.docker.com/engine/reference/commandline/ps/. """ command = "docker ps --no-trunc --format '{}'".format(DOCKER_PS_FORMAT) logger.debug("Running 'docker ps'...") output, error = get_output(command, fail_on_exit_code=False) if error: logger.warning("Error output from docker command: %s", error) return [] keys = [x.lower() for x in DOCKER_PS_FIELDS] return [ dict(zip(keys, line.split("\t"))) for line in output.split("\n") if line ]
def all_info(): """Return the info we want to extract from docker. The output looks like this:: $ docker system df TYPE TOTAL ACTIVE SIZE RECLAIMABLE Images 50 2 16.66 GB 16.13 GB (96%) Containers 2 2 70 B 0 B (0%) Local Volumes 3 3 123 MB 0 B (0%) """ result = DOCKER_TEMPLATE.copy() command = "docker system df" logger.debug("Running '%s'...", command) output, error = get_output(command, fail_on_exit_code=False) if error: logger.warning("Error output from docker command: %s", error) lines = [line.strip() for line in output.split("\n")] lines = [line.lower() for line in lines if line] if not lines or "active" not in lines[0]: return {} start_column = lines[0].find("active") for line in lines[1:]: count = line[start_column:start_column + 4].strip() try: count = int(count) except: count = "unknown" logger.exception("Couldn't parse int: %r", count) continue if "images" in line: result["active_images"] = count if "containers" in line: result["active_containers"] = count if "volumes" in line: result["active_volumes"] = count result["containers"] = container_details() for container in result["containers"]: container["python"] = python_details(container) logger.info("Found %d active docker containers", result["active_containers"]) return result
def supervisorctl_warnings(supervisorctl_command): """Return number of not-running processes inside supervisorctl""" command = "%s status" % supervisorctl_command logger.debug("Running '%s'...", command) output, error = get_output(command) if error: logger.warning("Error output from supervisorctl command: %s", error) lines = [line.strip() for line in output.split("\n")] lines = [line for line in lines if line] for exception in SUPERVISOR_CRONJOB_EXCEPTIONS: lines = [line for line in lines if exception not in line] not_running = [line for line in lines if "running" not in line.lower()] num_not_running = len(not_running) if num_not_running: logger.warning("Some processes in %s aren't running:", supervisorctl_command) for line in not_running: logger.warning(" %s", line) return num_not_running
def django_info_pipenv(directory): original_dir = os.getcwd() os.chdir(directory) matplotlibenv = "MPLCONFIGDIR=/tmp" # ^^^ Corner case when something needs matplotlib in django's settings. target_user_id = os.stat("manage.py").st_uid django_script = "pipenv run python manage.py" command = "sudo -u \\#%s %s %s diffsettings" % ( target_user_id, matplotlibenv, django_script, ) output, error = get_output(command, cwd=directory, fail_on_exit_code=False) os.chdir(original_dir) if error: logger.warning("Error output from diffsettings command: %s", error) if not output: return return parse_django_info(output)
def _table_bloat(database_names): query = """ -- btree index stats query -- estimates bloat for btree indexes WITH btree_index_atts AS ( SELECT nspname, indexclass.relname as index_name, indexclass.reltuples, indexclass.relpages, indrelid, indexrelid, indexclass.relam, tableclass.relname as tablename, regexp_split_to_table(indkey::text, ' ')::smallint AS attnum, indexrelid as index_oid FROM pg_index JOIN pg_class AS indexclass ON pg_index.indexrelid = indexclass.oid JOIN pg_class AS tableclass ON pg_index.indrelid = tableclass.oid JOIN pg_namespace ON pg_namespace.oid = indexclass.relnamespace JOIN pg_am ON indexclass.relam = pg_am.oid WHERE pg_am.amname = 'btree' and indexclass.relpages > 0 AND nspname NOT IN ('pg_catalog','information_schema') ), index_item_sizes AS ( SELECT ind_atts.nspname, ind_atts.index_name, ind_atts.reltuples, ind_atts.relpages, ind_atts.relam, indrelid AS table_oid, index_oid, current_setting('block_size')::numeric AS bs, 8 AS maxalign, 24 AS pagehdr, CASE WHEN max(coalesce(pg_stats.null_frac,0)) = 0 THEN 2 ELSE 6 END AS index_tuple_hdr, sum( (1-coalesce(pg_stats.null_frac, 0)) * coalesce(pg_stats.avg_width, 1024) ) AS nulldatawidth FROM pg_attribute JOIN btree_index_atts AS ind_atts ON pg_attribute.attrelid = ind_atts.indexrelid AND pg_attribute.attnum = ind_atts.attnum JOIN pg_stats ON pg_stats.schemaname = ind_atts.nspname -- stats for regular index columns AND ( (pg_stats.tablename = ind_atts.tablename AND pg_stats.attname = pg_catalog.pg_get_indexdef(pg_attribute.attrelid, pg_attribute.attnum, TRUE)) -- stats for functional indexes OR (pg_stats.tablename = ind_atts.index_name AND pg_stats.attname = pg_attribute.attname)) WHERE pg_attribute.attnum > 0 GROUP BY 1, 2, 3, 4, 5, 6, 7, 8, 9 ), index_aligned_est AS ( SELECT maxalign, bs, nspname, index_name, reltuples, relpages, relam, table_oid, index_oid, coalesce ( ceil ( reltuples * ( 6 + maxalign - CASE WHEN index_tuple_hdr%maxalign = 0 THEN maxalign ELSE index_tuple_hdr%maxalign END + nulldatawidth + maxalign - CASE /* Add padding to the data to align on MAXALIGN */ WHEN nulldatawidth::integer%maxalign = 0 THEN maxalign ELSE nulldatawidth::integer%maxalign END )::numeric / ( bs - pagehdr::NUMERIC ) +1 ) , 0 ) as expected FROM index_item_sizes ), raw_bloat AS ( SELECT current_database() as dbname, nspname, pg_class.relname AS table_name, index_name, bs*(index_aligned_est.relpages)::bigint AS totalbytes, expected, CASE WHEN index_aligned_est.relpages <= expected THEN 0 ELSE bs*(index_aligned_est.relpages-expected)::bigint END AS wastedbytes, CASE WHEN index_aligned_est.relpages <= expected THEN 0 ELSE bs*(index_aligned_est.relpages-expected)::bigint * 100 / (bs*(index_aligned_est.relpages)::bigint) END AS realbloat, pg_relation_size(index_aligned_est.table_oid) as table_bytes, stat.idx_scan as index_scans FROM index_aligned_est JOIN pg_class ON pg_class.oid=index_aligned_est.table_oid JOIN pg_stat_user_indexes AS stat ON index_aligned_est.index_oid = stat.indexrelid ), format_bloat AS ( SELECT dbname as database_name, nspname as schema_name, table_name, round(realbloat) as bloat_pct, round(wastedbytes/(1024^2)::NUMERIC) as bloat_mb FROM raw_bloat ) -- final query outputting the bloated indexes -- change the where and order by to change -- what shows up as bloated SELECT * FROM format_bloat WHERE ( bloat_pct > 20 and bloat_mb > 10 ) ORDER BY bloat_mb DESC; """ result = [] for database_name in database_names: command = 'sudo -u postgres psql -c "%s" --tuples-only %s' % ( query, database_name, ) output, error = get_output(command) if error: logger.warning("Error output from psql command: %s", error) for line in output.split("\n"): # database_name | schema_name | table_name | bloat_pct | bloat_mb if "|" not in line: continue parts = [part.strip() for part in line.split("|")] name = ":".join([parts[0].strip(), parts[1], parts[2]]) percentage = parts[3] mb = parts[4] result.append({"name": name, "percentage": percentage, "mb": mb}) logger.info("Table %s has %s%% bloat (%sMB)", name, percentage, mb) return result