def test_access_token_set(self): """ Test that methods can be called when access token is set """ buildkite = Buildkite() buildkite.set_access_token("FAKE-ACCESS-TOKEN") assert buildkite.agents()
# - Get a Buildkite API token with read_agents and write_agents permissions # and put it into the BUILDKITE_TOKEN environment variable. # - Optional: Modify "FILTER" below to restart different groups of agents. # For example, you can put "windows" there to restart Windows agents. import os from pybuildkite.buildkite import Buildkite from requests.exceptions import HTTPError FILTER = "docker" ORGANIZATION = "bazel" print("Using Buildkite API Token: {}".format(os.environ["BUILDKITE_TOKEN"])) buildkite = Buildkite() buildkite.set_access_token(os.environ["BUILDKITE_TOKEN"]) response = buildkite.agents().list_all(organization=ORGANIZATION, page=1, with_pagination=True) agents = response.body # Keep looping until next_page is not populated while response.next_page: response = buildkite.agents().list_all(organization=ORGANIZATION, page=response.next_page, with_pagination=True) agents += response.body for agent in agents: if FILTER in agent["name"]:
',') if ignore_job_states else [] def check_var(var: str, name: str, label: str) -> None: if var is None: raise RuntimeError( '{} must be provided via action input or environment variable {}' .format(label, name)) event = get_var('GITHUB_EVENT_PATH') event_name = get_var('GITHUB_EVENT_NAME') check_var(event, 'GITHUB_EVENT_PATH', 'GitHub event file path') check_var(event_name, 'GITHUB_EVENT_NAME', 'GitHub event name') with open(event, 'r') as f: event = json.load(f) commit = get_var('COMMIT') or get_commit_sha(event, event_name) output_path = get_var('OUTPUT_PATH') or '.' check_var(github_token, 'GITHUB_TOKEN', 'GitHub token') check_var(github_repo, 'GITHUB_REPOSITORY', 'GitHub repository') check_var(buildkite_token, 'BUILDKITE_TOKEN', 'BuildKite token') check_var(commit, 'COMMIT', 'Commit') buildkite = Buildkite() buildkite.set_access_token(buildkite_token) if not main(github_api_url, github_token, github_repo, buildkite, buildkite_url, ignore_build_states, ignore_job_states, commit, output_path): sys.exit(1)
class ReproSession: plugin_default_env = { "docker": { "BUILDKITE_PLUGIN_DOCKER_MOUNT_BUILDKITE_AGENT": False } } def __init__( self, buildkite_token: str, instance_name: Optional[str] = None, logger: Optional[logging.Logger] = None, ): self.logger = logger or logging.getLogger(self.__class__.__name__) self.bk = Buildkite() self.bk.set_access_token(buildkite_token) self.ssh_user = "******" self.ssh_key_name = "buildkite-repro-env" self.ssh_key_file = "~/.ssh/buildkite-repro-env.pem" self.ec2_client = boto3.client("ec2", region_name="us-west-2") self.ec2_resource = boto3.resource("ec2", region_name="us-west-2") self.org = None self.pipeline = None self.build_id = None self.job_id = None self.env: Dict[str, str] = {} self.aws_instance_name = instance_name self.aws_instance_id = None self.aws_instance_ip = None self.ssh = None self.plugins = {} self.skipped_commands = [] def set_session(self, session_url: str): # E.g.: # https://buildkite.com/ray-project/ray-builders-pr/ # builds/19635#55a0d71a-831e-4f68-b668-2b10c6f65ee6 pattern = re.compile( "https://buildkite.com/([^/]+)/([^/]+)/builds/([0-9]+)#(.+)") org, pipeline, build_id, job_id = pattern.match(session_url).groups() self.logger.debug(f"Parsed session URL: {session_url}. " f"Got org='{org}', pipeline='{pipeline}', " f"build_id='{build_id}', job_id='{job_id}'.") self.org = org self.pipeline = pipeline self.build_id = build_id self.job_id = job_id def fetch_env_variables(self, overwrite: Optional[Dict[str, Any]] = None): assert self.bk self.env = self.bk.jobs().get_job_environment_variables( self.org, self.pipeline, self.build_id, self.job_id)["env"] if overwrite: self.env.update(overwrite) return self.env def aws_start_instance(self): assert self.env if not self.aws_instance_name: self.aws_instance_name = f"repro_ci_{self.build_id}_{self.job_id[:8]}" self.logger.info( f"No instance name provided, using {self.aws_instance_name}") instance_type = self.env["BUILDKITE_AGENT_META_DATA_AWS_INSTANCE_TYPE"] instance_ami = self.env["BUILDKITE_AGENT_META_DATA_AWS_AMI_ID"] instance_sg = "sg-0ccfca2ef191c04ae" instance_block_device_mappings = [{ "DeviceName": "/dev/xvda", "Ebs": { "VolumeSize": 500 } }] # Check if instance exists: running_instances = self.ec2_resource.instances.filter(Filters=[ { "Name": "tag:repro_name", "Values": [self.aws_instance_name] }, { "Name": "instance-state-name", "Values": ["running"] }, ]) self.logger.info( f"Check if instance with name {self.aws_instance_name} " f"already exists...") for instance in running_instances: self.aws_instance_id = instance.id self.aws_instance_ip = instance.public_ip_address self.logger.info(f"Found running instance {self.aws_instance_id}.") return self.logger.info( f"Instance with name {self.aws_instance_name} not found, " f"creating...") # Else, not running, yet, start. instance = self.ec2_resource.create_instances( BlockDeviceMappings=instance_block_device_mappings, ImageId=instance_ami, InstanceType=instance_type, KeyName=self.ssh_key_name, SecurityGroupIds=[instance_sg], TagSpecifications=[{ "ResourceType": "instance", "Tags": [{ "Key": "repro_name", "Value": self.aws_instance_name }], }], MinCount=1, MaxCount=1, )[0] self.aws_instance_id = instance.id self.logger.info( f"Created new instance with ID {self.aws_instance_id}") def aws_wait_for_instance(self): assert self.aws_instance_id self.logger.info("Waiting for instance to come up...") repro_instance_state = None while repro_instance_state != "running": detail = self.ec2_client.describe_instances( InstanceIds=[self.aws_instance_id], ) repro_instance_state = detail["Reservations"][0]["Instances"][0][ "State"]["Name"] if repro_instance_state != "running": time.sleep(2) self.aws_instance_ip = detail["Reservations"][0]["Instances"][0][ "PublicIpAddress"] def aws_stop_instance(self): assert self.aws_instance_id self.ec2_client.terminate_instances(InstanceIds=[self.aws_instance_id ], ) def print_stop_command(self): click.secho("To stop this instance in the future, run this: ") click.secho( f"aws ec2 terminate-instances " f"--instance-ids={self.aws_instance_id}", bold=True, ) def create_new_ssh_client(self): assert self.aws_instance_ip if self.ssh: self.ssh.close() self.logger.info( "Creating SSH client and waiting for SSH to become available...") ssh = paramiko.client.SSHClient() ssh.load_system_host_keys() ssh.set_missing_host_key_policy(paramiko.WarningPolicy()) timeout = time.monotonic() + 60 while time.monotonic() < timeout: try: ssh.connect( self.aws_instance_ip, username=self.ssh_user, key_filename=os.path.expanduser(self.ssh_key_file), ) break except paramiko.ssh_exception.NoValidConnectionsError: self.logger.info("SSH not ready, yet, sleeping 5 seconds") time.sleep(5) self.ssh = ssh return self.ssh def close_ssh(self): self.ssh.close() def ssh_exec(self, command, quiet: bool = False, *args, **kwargs): result = {} def exec(): stdin, stdout, stderr = self.ssh.exec_command(command, get_pty=True) output = "" for line in stdout.readlines(): output += line if not quiet: print(line, end="") for line in stderr.readlines(): if not quiet: print(line, end="") result["output"] = output thread = threading.Thread(target=exec) thread.start() status = time.monotonic() + 30 while thread.is_alive(): thread.join(2) if time.monotonic() >= status and thread.is_alive(): self.logger.info("Still executing...") status = time.monotonic() + 30 thread.join() return result.get("output", "") def execute_ssh_command( self, command: str, env: Optional[Dict[str, str]] = None, as_script: bool = False, quiet: bool = False, command_wrapper: Optional[Callable[[str], str]] = None, ) -> str: assert self.ssh if not command_wrapper: def command_wrapper(s): return s full_env = self.env.copy() if env: full_env.update(env) if as_script: ftp = self.ssh.open_sftp() file = ftp.file("/tmp/script.sh", "w", -1) file.write("#!/bin/bash\n") for k, v in env.items(): file.write(f"{k}={script_str(v)}\n") file.write(command + "\n") file.flush() ftp.close() full_command = "bash /tmp/script.sh" else: full_command = f"export {env_str(full_env)}; {command}" full_command = command_wrapper(full_command) self.logger.debug(f"Executing command: {command}") output = self.ssh_exec(full_command, quiet=quiet, get_pty=True) return output def execute_ssh_commands( self, commands: List[str], env: Optional[Dict[str, str]] = None, quiet: bool = False, ): for command in commands: self.execute_ssh_command(command, env=env, quiet=quiet) def execute_docker_command(self, command: str, env: Optional[Dict[str, str]] = None, quiet: bool = False): def command_wrapper(s): escaped = s.replace("'", "'\"'\"'") return f"docker exec -it ray_container /bin/bash -ci '{escaped}'" self.execute_ssh_command(command, env=env, quiet=quiet, command_wrapper=command_wrapper) def prepare_instance(self): self.create_new_ssh_client() output = self.execute_ssh_command("docker ps", quiet=True) if "CONTAINER ID" in output: self.logger.info("Instance already prepared.") return self.logger.info("Preparing instance (installing docker etc.)") commands = [ "sudo yum install -y docker", "sudo service docker start", f"sudo usermod -aG docker {self.ssh_user}", ] self.execute_ssh_commands(commands, quiet=True) self.create_new_ssh_client() self.execute_ssh_command("docker ps", quiet=True) self.docker_login() def docker_login(self): self.logger.info("Logging into docker...") credentials = boto3.client( "ecr", region_name="us-west-2").get_authorization_token() token = (base64.b64decode( credentials["authorizationData"][0]["authorizationToken"]).decode( "utf-8").replace("AWS:", "")) endpoint = credentials["authorizationData"][0]["proxyEndpoint"] self.execute_ssh_command(f"docker login -u AWS -p {token} {endpoint}", quiet=True) def fetch_buildkite_plugins(self): assert self.env self.logger.info("Fetching Buildkite plugins") plugins = json.loads(self.env["BUILDKITE_PLUGINS"]) for collection in plugins: for plugin, options in collection.items(): plugin_url, plugin_version = plugin.split("#") if not plugin_url.startswith( "http://") or not plugin_url.startswith("https://"): plugin_url = f"https://{plugin_url}" plugin_name = plugin_url.split("/")[-1].rstrip(".git") plugin_short = plugin_name.replace("-buildkite-plugin", "") plugin_dir = f"~/{plugin_name}" plugin_env = self.get_plugin_env(plugin_short, options) self.plugins[plugin_short] = { "name": plugin_name, "options": options, "short": plugin_short, "url": plugin_url, "version": plugin_version, "dir": plugin_dir, "env": plugin_env, "details": {}, } def get_plugin_env(self, plugin_short: str, options: Dict[str, Any]): plugin_env = {} for option, value in options.items(): option_name = option.replace("-", "_").upper() env_name = f"BUILDKITE_PLUGIN_{plugin_short.upper()}_{option_name}" plugin_env[env_name] = value plugin_env.update(self.plugin_default_env.get(plugin_short, {})) return plugin_env def install_buildkite_plugin(self, plugin: str): assert plugin in self.plugins self.logger.info(f"Installing Buildkite plugin: {plugin}") plugin_dir = self.plugins[plugin]["dir"] plugin_url = self.plugins[plugin]["url"] plugin_version = self.plugins[plugin]["version"] self.execute_ssh_command( f"[ ! -e {plugin_dir} ] && git clone --depth 1 " f"--branch {plugin_version} {plugin_url} {plugin_dir}", quiet=True, ) def load_plugin_details(self, plugin: str): assert plugin in self.plugins plugin_dir = self.plugins[plugin]["dir"] yaml_str = self.execute_ssh_command(f"cat {plugin_dir}/plugin.yml", quiet=True) details = yaml.safe_load(yaml_str) self.plugins[plugin]["details"] = details return details def execute_plugin_hook( self, plugin: str, hook: str, env: Optional[Dict[str, Any]] = None, script_command: Optional[str] = None, ): assert plugin in self.plugins self.logger.info( f"Executing Buildkite hook for plugin {plugin}: {hook}. " f"This pulls a Docker image and could take a while.") plugin_dir = self.plugins[plugin]["dir"] plugin_env = self.plugins[plugin]["env"].copy() if env: plugin_env.update(env) script_command = script_command or "bash -l" hook_script = f"{plugin_dir}/hooks/{hook}" self.execute_ssh_command( f"[ -f {hook_script} ] && cat {hook_script} | {script_command} ", env=plugin_env, as_script=False, quiet=True, ) def print_buildkite_command(self, skipped: bool = False): print("-" * 80) print("These are the commands you need to execute to fully reproduce " "the run") print("-" * 80) print(self.env["BUILDKITE_COMMAND"]) print("-" * 80) if skipped and self.skipped_commands: print("Some of the commands above have already been run. " "Remaining commands:") print("-" * 80) print("\n".join(self.skipped_commands)) print("-" * 80) def run_buildkite_command(self, command_filter: Optional[List[str]] = None): commands = self.env["BUILDKITE_COMMAND"].split("\n") regexes = [re.compile(cf) for cf in command_filter or []] skipped_commands = [] for command in commands: if any(rx.search(command) for rx in regexes): self.logger.info(f"Filtered build command: {command}") skipped_commands.append(command) continue self.logger.info(f"Executing build command: {command}") self.execute_docker_command(command) self.skipped_commands = skipped_commands def transfer_env_to_container(self): escaped = env_str(self.env).replace("'", "'\"'\"'") self.execute_docker_command( f"grep -q 'source ~/.env' $HOME/.bashrc " f"|| echo 'source ~/.env' >> $HOME/.bashrc; " f"echo 'export {escaped}' > $HOME/.env", quiet=True, ) def attach_to_container(self): self.logger.info("Attaching to AWS instance...") ssh_command = (f"ssh -ti {self.ssh_key_file} " f"-o StrictHostKeyChecking=no " f"-o ServerAliveInterval=30 " f"{self.ssh_user}@{self.aws_instance_ip} " f"'docker exec -it ray_container bash -l'") subprocess.run(ssh_command, shell=True)
def get_buildkite_api() -> Buildkite: bk = Buildkite() buildkite_token = maybe_fetch_buildkite_token() bk.set_access_token(buildkite_token) return bk
TOKEN = os.environ.get('BUILDKITE_TOKEN') if not TOKEN: TOKEN = getpass('Please enter your Buildkite API token.\n' + 'Scope: read_builds, write_builds, read_pipelines.\n' + 'https://buildkite.com/user/api-access-tokens\n> ') if not TOKEN: print('Missing token.') exit(1) return TOKEN TOKEN = get_token() ORG = 'pagerduty' unblock_filename = 'local_unblock_fields.json' bk = Buildkite() bk.set_access_token(TOKEN) def load_json(filename): data = {} with open(filename) as file: data = json.load(file) return data def run_build(build_task): build = start_build(build_task) print('\nStarting build.') time.sleep(5) print('Loading and unblocking build.', end='') build = get_build(build)
import pandas as pd from pybuildkite.buildkite import Buildkite, BuildState import matplotlib.pyplot as plt import cia.plot as plot import cia.utils as utils import cia.filter as bfilter import cia.analysis as analysis from cia.cfg import CFG, TODAY log = logging.getLogger(__name__) BK_CLIENT = Buildkite() BK_CLIENT.set_access_token(os.environ["BUILDKITE_API_TOKEN"]) _PLOTS_FOR_SUBPLOTS = [] def main(): builds = rewrite_build_objects( load_all_builds(CFG().args.org, CFG().args.pipeline, [BuildState.FINISHED])) builds = bfilter.drop_builds_that_did_not_start_or_finish(builds) builds = bfilter.filter_builds_based_on_build_time(builds) build_numbers = sorted([b["number"] for b in builds])