Example #1
0
def main():
    init_logging()
    logging.info('Worker started')
    signal.signal(signal.SIGTERM, sig_handler)
    signal.signal(signal.SIGINT, sig_handler)

    while not sigterm.is_set():
        logging.debug('Starting poll cycle')
        try:
            worker_loop()
            req('PUT',
                '%s/workers/%s.json' % (DB, WORKER_NAME),
                body=make_worker_obj('IDLE'))
        except:
            logging.error('Exception in worker loop:\n%s',
                          traceback.format_exc())
        if sigterm.is_set():
            break

        # Synchronize sleeping with the wall clock. This is so all VMs wake up at
        # the same time. See comment on distributing load above in this file.
        poll_time_sec = 5
        time.sleep(poll_time_sec - (time.time() % poll_time_sec))

    # The use case here is the VM being terminated by the GCE infrastructure.
    # We mark the worker as terminated and the job as cancelled so we don't wait
    # forever for it.
    logging.warning('Exiting the worker loop, got signal: %s',
                    sigterm.is_set())
    req('PUT',
        '%s/workers/%s.json' % (DB, WORKER_NAME),
        body=make_worker_obj('TERMINATED'))
Example #2
0
def main():
    init_logging()
    logging.info('Worker started')
    signal.signal(signal.SIGTERM, sig_handler)
    signal.signal(signal.SIGINT, sig_handler)

    while not sigterm.is_set():
        logging.debug('Starting poll cycle')
        try:
            worker_loop()
            req('PUT',
                '%s/workers/%s.json' % (DB, WORKER_NAME),
                body=make_worker_obj('IDLE'))
        except:
            logging.error('Exception in worker loop:\n%s',
                          traceback.format_exc())
        if sigterm.is_set():
            break
        time.sleep(5)

    # The use case here is the VM being terminated by the GCE infrastructure.
    # We mark the worker as terminated and the job as cancelled so we don't wait
    # forever for it.
    logging.warning('Exiting the worker loop, got signal: %s',
                    sigterm.is_set())
    req('PUT',
        '%s/workers/%s.json' % (DB, WORKER_NAME),
        body=make_worker_obj('TERMINATED'))
def main():
  init_logging()
  signal.alarm(WATCHDOG_SEC)
  mimetypes.add_type('application/wasm', '.wasm')

  parser = argparse.ArgumentParser()
  parser.add_argument('--rm', action='store_true', help='Removes the directory')
  parser.add_argument('--job-id', type=str, required=True,
                      help='The Perfetto CI job ID to tie this upload to')
  args = parser.parse_args()
  job_id = args.job_id
  dirpath = os.path.join(os.getenv('ARTIFACTS_DIR', default=os.curdir), job_id)
  if not os.path.isdir(dirpath):
    logging.error('Directory not found: %s', dirpath)
    return 1

  total_size = 0
  uploads = 0
  failures = 0
  files = list_files(dirpath)
  pool = ThreadPool(processes=10)
  for upl_size in pool.imap_unordered(upload_one_file_with_retries, files):
    uploads += 1 if upl_size >= 0 else 0
    failures += 1 if upl_size < 0 else 0
    total_size += max(upl_size, 0)

  logging.info('Uploaded artifacts for %s: %d files, %s failures, %d KB',
               job_id, uploads, failures, total_size / 1e3)

  scan_and_upload_perf_folder(job_id, dirpath)

  if args.rm:
    subprocess.call(['sudo', 'rm', '-rf', dirpath])

  return 0
Example #4
0
def main():
  init_logging()
  mimetypes.add_type('application/wasm', '.wasm')
  logging.info('Artifacts uploader started')
  pool = ThreadPool(processes=32)
  while True:
    scan_and_uplod_artifacts(pool, remove_after_upload='--rm' in sys.argv)
    time.sleep(RESCAN_PERIOD_SEC)
Example #5
0
def main(argv):
    init_logging()
    if len(argv) != 2:
        print('Usage: %s job_id' % argv[0])
        return 1

    job_id = argv[1]
    res = 42

    # The container name will be worker-N-sandbox.
    container = socket.gethostname() + '-sandbox'

    # Remove stale jobs, if any.
    subprocess.call(['sudo', 'docker', 'rm', '-f', container])

    q = queue.Queue()

    # Conversely to real programs, signal handlers in python aren't really async
    # but are queued on the main thread. Hence We need to keep the main thread
    # responsive to react to signals. This is to handle timeouts and graceful
    # termination of the worker container, which dispatches a SIGTERM on stop.
    def sig_handler(sig, _):
        logging.warning('Job runner got signal %s, terminating job %s', sig,
                        job_id)
        subprocess.call(['sudo', 'docker', 'kill', container])
        os._exit(
            1)  # sys.exit throws a SystemExit exception, _exit really exits.

    signal.signal(signal.SIGTERM, sig_handler)

    log_thd = threading.Thread(target=log_thread, args=(job_id, q))
    log_thd.start()

    # SYS_PTRACE is required for gtest death tests and LSan.
    cmd = [
        'sudo', 'docker', 'run', '--name', container, '--hostname', container,
        '--cap-add', 'SYS_PTRACE', '--rm', '--env',
        'PERFETTO_TEST_JOB=%s' % job_id, '--tmpfs', '/tmp:exec'
    ]

    # Propagate environment variables coming from the job config.
    for kv in [
            kv for kv in os.environ.items() if kv[0].startswith('PERFETTO_')
    ]:
        cmd += ['--env', '%s=%s' % kv]

    # We use the tmpfs mount created by gce-startup-script.sh, if present. The
    # problem is that Docker doesn't allow to both override the tmpfs-size and
    # prevent the "-o noexec". In turn the default tmpfs-size depends on the host
    # phisical memory size.
    if os.getenv('SANDBOX_TMP'):
        cmd += ['-v', '%s:/ci/ramdisk' % os.getenv('SANDBOX_TMP')]
    else:
        cmd += ['--tmpfs', '/ci/ramdisk:exec']

    # Rationale for the conditional branches below: when running in the real GCE
    # environment, the gce-startup-script.sh mounts these directories in the right
    # locations, so that they are shared between all workers.
    # When running the worker container outside of GCE (i.e.for local testing) we
    # leave these empty. The VOLUME directive in the dockerfile will cause docker
    # to automatically mount a scratch volume for those.
    # This is so that the CI containers can be tested without having to do the
    # work that gce-startup-script.sh does.
    if os.getenv('SHARED_WORKER_CACHE'):
        cmd += ['--volume=%s:/ci/cache' % os.getenv('SHARED_WORKER_CACHE')]

    artifacts_dir = None
    if os.getenv('ARTIFACTS_DIR'):
        artifacts_dir = os.path.join(os.getenv('ARTIFACTS_DIR'), job_id)
        subprocess.call(['sudo', 'rm', '-rf', artifacts_dir])
        os.mkdir(artifacts_dir)
        cmd += ['--volume=%s:/ci/artifacts' % artifacts_dir]

    cmd += os.getenv('SANDBOX_NETWORK_ARGS', '').split()
    cmd += [SANDBOX_IMG]

    logging.info('Starting %s', ' '.join(cmd))
    proc = subprocess.Popen(cmd,
                            stdin=open(os.devnull),
                            stdout=subprocess.PIPE,
                            stderr=subprocess.STDOUT,
                            bufsize=65536)
    stdout = ''
    tstart = time.time()
    while True:
        ms_elapsed = int((time.time() - tstart) * 1000)
        stdout += read_nonblock(proc.stdout)

        # stdout/err pipes are not atomic w.r.t. '\n'. Extract whole lines out into
        # |olines| and keep the last partial line (-1) in the |stdout| buffer.
        lines = stdout.split('\n')
        stdout = lines[-1]
        lines = lines[:-1]

        # Each line has a key of the form <time-from-start><out|err><counter>
        # |counter| is relative to the batch and is only used to disambiguate lines
        # fetched at the same time, preserving the ordering.
        batch = {}
        for counter, line in enumerate(lines):
            batch['%06x-%04x' % (ms_elapsed, counter)] = line
        if batch:
            q.put(batch)
        if proc.poll() is not None:
            res = proc.returncode
            logging.info('Job subprocess terminated with code %s', res)
            break

        # Large sleeps favour batching in the log uploader.
        # Small sleeps favour responsiveness of the signal handler.
        time.sleep(1)

    q.put(None)  # EOF maker
    log_thd.join()

    if artifacts_dir:
        artifacts_uploader = os.path.join(CUR_DIR, 'artifacts_uploader.py')
        cmd = ['setsid', artifacts_uploader, '--job-id=%s' % job_id, '--rm']
        subprocess.call(cmd)

    return res