Esempio n. 1
0
def cmd_build(args, remainder):
    common_args = []
    common_args += ['--config={}'.format(args.variant)]
    common_args += ['--define=version={}'.format(args.version)]
    common_args += ['--explain={}'.format(os.path.abspath('explain.log'))]
    common_args += ['--verbose_failures']
    common_args += ['--verbose_explanations']
    if platform.system() == 'Windows':
        util.check_call(['git', 'config', 'core.symlinks', 'true'])
        cenv = util.CondaEnv(pathlib.Path('.cenv'))
        cenv.create('environment-windows.yml')
        env = os.environ.copy()
        env.update(cenv.env())
    else:
        env = None

    util.check_call(['bazelisk', 'test', '...'] + common_args, env=env)
    archive_dir = os.path.join(
        args.root,
        args.pipeline,
        args.build_id,
        'build',
        args.variant,
    )
    os.makedirs(archive_dir, exist_ok=True)
    shutil.copy(os.path.join('bazel-bin', 'pkg.tar.gz'), archive_dir)
Esempio n. 2
0
def cmd_build(args, remainder):
    import yaml
    with open('ci/plan.yml') as file_:
        plan = yaml.safe_load(file_)

    env = os.environ.copy()
    variant = plan['VARIANTS'][args.variant]
    for key, value in variant['env'].items():
        env[key] = str(value)

    util.printf('--- :snake: pre-build steps... ')
    util.printf('delete any old whl files...')
    wheel_dirs = [
        wheel_path('plaidml').resolve(),
        wheel_path('plaidml/keras').resolve(),
        wheel_path('plaidbench').resolve(),
    ]
    wheel_clean(wheel_dirs)

    explain_log = 'explain.log'
    profile_json = 'profile.json.gz'
    bazel_config = variant.get('bazel_config', args.variant)

    common_args = []
    common_args += ['--config={}'.format(bazel_config)]
    common_args += ['--define=version={}'.format(args.version)]
    common_args += ['--experimental_generate_json_trace_profile']
    common_args += ['--experimental_json_trace_compression']
    common_args += ['--experimental_profile_cpu_usage']
    common_args += ['--explain={}'.format(explain_log)]
    common_args += ['--profile={}'.format(profile_json)]
    common_args += ['--verbose_failures']
    common_args += ['--verbose_explanations']

    util.printf('--- :bazel: Running Build...')
    if platform.system() == 'Windows':
        util.check_call(['git', 'config', 'core.symlinks', 'true'])
        cenv = util.CondaEnv(pathlib.Path('.cenv'))
        cenv.create('environment-windows.yml')
        env.update(cenv.env())
    util.check_call(['bazelisk', 'test', '...'] + common_args, env=env)

    util.printf('--- :buildkite: Uploading artifacts...')
    buildkite_upload(explain_log)
    buildkite_upload(profile_json)
    for wheel_dir in wheel_dirs:
        buildkite_upload('*.whl', cwd=wheel_dir)

    archive_dir = os.path.join(
        args.root,
        args.pipeline,
        args.build_id,
        'build',
        args.variant,
    )
    os.makedirs(archive_dir, exist_ok=True)
    shutil.copy(os.path.join('bazel-bin', 'pkg.tar.gz'), archive_dir)
Esempio n. 3
0
def cmd_build(args, remainder):
    import yaml
    with open('ci/plan.yml') as file_:
        plan = yaml.safe_load(file_)

    env = os.environ.copy()
    variant = plan['VARIANTS'][args.variant]
    for key, value in variant['env'].items():
        env[key] = str(value)

    explain_log = 'explain.log'
    profile_json = 'profile.json.gz'
    bazel_config = variant.get('bazel_config', args.variant)

    common_args = []
    common_args += ['--config={}'.format(bazel_config)]
    common_args += ['--define=version={}'.format(args.version)]
    common_args += ['--experimental_generate_json_trace_profile']
    common_args += ['--experimental_json_trace_compression']
    common_args += ['--experimental_profile_cpu_usage']
    common_args += ['--explain={}'.format(explain_log)]
    common_args += ['--profile={}'.format(profile_json)]
    common_args += ['--verbose_failures']
    common_args += ['--verbose_explanations']

    util.printf('--- :bazel: Running Build...')
    if platform.system() == 'Windows':
        util.check_call(['git', 'config', 'core.symlinks', 'true'])
        cenv = util.CondaEnv(pathlib.Path('.cenv'))
        cenv.create('environment-windows.yml')
        env.update(cenv.env())
    util.check_call(['bazelisk', 'test', '...'] + common_args, env=env)

    util.printf('--- :buildkite: Uploading artifacts...')
    buildkite_upload(explain_log)
    buildkite_upload(profile_json)

    shutil.rmtree('tmp', ignore_errors=True)
    tarball = os.path.join('bazel-bin', 'pkg.tar.gz')
    with tarfile.open(tarball, "r") as tar:
        wheels = []
        for item in tar.getmembers():
            if item.name.endswith('.whl'):
                wheels.append(item)
        tar.extractall('tmp', members=wheels)
    buildkite_upload('*.whl', cwd='tmp')

    archive_dir = os.path.join(
        args.root,
        args.pipeline,
        args.build_id,
        'build',
        args.variant,
    )
    os.makedirs(archive_dir, exist_ok=True)
    shutil.copy(tarball, archive_dir)
Esempio n. 4
0
def cmd_build(args, remainder):

    util.printf('--- :snake: pre-build steps... ')
    util.printf('bazel shutdown...')
    util.check_output(['bazelisk', 'shutdown'])
    util.printf('delete any old whl files...')
    wheel_clean('plaidml')
    wheel_clean('plaidbench')
    wheel_clean('plaidml/keras')

    common_args = []
    common_args += ['--config={}'.format(args.variant)]
    common_args += ['--define=version={}'.format(args.version)]
    common_args += ['--explain={}'.format(os.path.abspath('explain.log'))]
    common_args += ['--verbose_failures']
    common_args += ['--verbose_explanations']

    util.printf('--- :bazel: Running Build ...')

    if platform.system() == 'Windows':
        util.check_call(['git', 'config', 'core.symlinks', 'true'])
        cenv = util.CondaEnv(pathlib.Path('.cenv'))
        cenv.create('environment-windows.yml')
        env = os.environ.copy()
        env.update(cenv.env())
    else:
        env = None

    util.check_call(['bazelisk', 'test', '...'] + common_args,
                    env=env,
                    stderr=subprocess.DEVNULL)
    archive_dir = os.path.join(
        args.root,
        args.pipeline,
        args.build_id,
        'build',
        args.variant,
    )

    util.printf('--- :buildkite: Uploading artifacts...')
    pw = wheel_path('plaidml').resolve()
    pbw = wheel_path('plaidbench').resolve()
    pkw = wheel_path('plaidml/keras').resolve()
    util.check_call(['buildkite-agent', 'artifact', 'upload', '*.whl'], cwd=pw)
    util.check_call(['buildkite-agent', 'artifact', 'upload', '*.whl'],
                    cwd=pbw)
    util.check_call(['buildkite-agent', 'artifact', 'upload', '*.whl'],
                    cwd=pkw)
    os.makedirs(archive_dir, exist_ok=True)
    shutil.copy(os.path.join('bazel-bin', 'pkg.tar.gz'), archive_dir)
    util.printf('bazel shutdown...')
    util.check_output(['bazelisk', 'shutdown'])
Esempio n. 5
0
def run(args, shargs):
    if shargs:
        print('running shard: ', shargs[3])
        shard_num = shargs[3]

    root = pathlib.Path('.').resolve() / 'tmp'
    input = root / 'input'
    output_root = root / 'output'

    if shargs:
        shard_result = str(args.workload) + '-' + shargs[3]
        output = output_root / args.suite / shard_result / args.platform / 'BATCH_SIZE={}'.format(
            args.batch_size)
    else:
        output = output_root / args.suite / args.workload / args.platform / 'BATCH_SIZE={}'.format(
            args.batch_size)

    with open('ci/plan.yml') as fp:
        plan = yaml.safe_load(fp)

    platform = plan['PLATFORMS'][args.platform]
    variant_name = platform['variant']
    variant = plan['VARIANTS'][variant_name]
    arch = variant['arch']

    suites = plan['SUITES']
    suite = suites.get(args.suite)
    if suite is None:
        sys.exit('Invalid suite. Available suites: {}'.format(list(suites)))
    platform_cfg = suite['platforms'][args.platform]

    workload = suite['workloads'].get(args.workload)
    if workload is None:
        sys.exit('Invalid workload. Available workloads: {}'.format(
            list(suite['workloads'])))

    popt = util.PlanOption(suite, workload, args.platform)

    shutil.rmtree(input, ignore_errors=True)
    archive_dir = pathlib.Path(args.root) / args.pipeline / args.build_id
    if args.local:
        pkg_path = pathlib.Path('bazel-bin/pkg.tar.gz')
        outdir = root / 'nas'
        version = '0.0.0.dev0'
    else:
        pkg_path = archive_dir / 'build' / variant_name / 'pkg.tar.gz'
        outdir = archive_dir
        version = args.version

    util.printf('--- Extracting {} -> {}'.format(pkg_path, input))
    with tarfile.open(str(pkg_path), 'r') as tar:
        tar.extractall(input)

    shutil.rmtree(output_root, ignore_errors=True)
    output.mkdir(parents=True)

    cwd = popt.get('cwd', '.')
    spec = pathlib.Path(popt.get('conda_env'))

    util.printf('--- :snake: Creating conda env from {}'.format(spec))
    instance_name = os.getenv('BUILDKITE_AGENT_NAME', 'harness')
    sig = hashlib.md5()
    sig.update(spec.read_bytes())
    base_path = pathlib.Path('~', '.t2', instance_name,
                             sig.hexdigest()).expanduser()

    base_env = util.CondaEnv(base_path)
    base_env.create(spec)
    conda_env = base_env.clone(root / pathlib.Path('cenv'))
    env = os.environ.copy()
    env.update(conda_env.env())

    for whl in popt.get('wheels', []):
        whl_filename = whl.format(arch=arch, version=version)
        whl_path = input / whl_filename
        conda_env.install(whl_path)

    if 'stripe' in args.platform:
        env['USE_STRIPE'] = '1'
    if 'cuda' in args.platform:
        env['CUDA_VISIBLE_DEVICES'] = buildkite_metadata(
            'CUDA_VISIBLE_DEVICES', '0')
    env['PLAIDML_DEVICE_IDS'] = buildkite_metadata('PLAIDML_DEVICE_IDS')
    env['PLAIDML_EXPERIMENTAL'] = buildkite_metadata('PLAIDML_EXPERIMENTAL',
                                                     '0')

    util.printf(
        '--- :bazel: Running test {suite}/{workload} on {platform}'.format(
            suite=args.suite,
            workload=args.workload,
            platform=args.platform,
        ))

    cmd_args = platform_cfg.get('prepend_args', []) + popt.get(
        'prepend_args', [])
    cmd_args += platform_cfg.get('args', []) + popt.get('args', [])
    cmd_args += platform_cfg.get('append_args', []) + popt.get(
        'append_args', [])

    if shargs:
        cmd_args += shargs

    ctx = dict(
        results=output,
        batch_size=args.batch_size,
        workload=args.workload,
    )
    cmd_args = [str(x).format(**ctx) for x in cmd_args]
    if 'stripe' in args.platform:
        try:
            cmd_args.remove('--no-kernel-timing')
        except ValueError:
            pass

    cmd = [popt.get('runner')] + cmd_args
    retcode = util.call(cmd, cwd=cwd, env=env)

    build_url = os.getenv('BUILDKITE_BUILD_URL')
    if build_url:
        build_url = '{}#{}'.format(build_url, os.getenv('BUILDKITE_JOB_ID'))
    else:
        build_url = DEFAULT_BUILD_URL

    gpu_flops = plan['CONST']['gpu_flops']
    baseline_name = plan['CONST']['efficiency_baseline']

    if shargs:
        test_info = util.TestInfo(
            (args.suite, suite),
            (shard_result, workload),
            (args.platform, util.Platform(args.platform, gpu_flops)),
            args.batch_size,
        )
    else:
        test_info = util.TestInfo(
            (args.suite, suite),
            (args.workload, workload),
            (args.platform, util.Platform(args.platform, gpu_flops)),
            args.batch_size,
        )

    golden_info = util.TestInfo(
        (args.suite, suite),
        (args.workload, workload),
        (baseline_name, util.Platform(baseline_name, gpu_flops)),
        args.batch_size,
    )

    result = analysis.Result(output_root, test_info, golden_info)
    report = {
        'build_url': build_url,
        'compare': result.test_result.compare,
        'efficiency': result.efficiency,
        'errors': result.test_result.errors,
        'failures': result.test_result.failures,
        'ratio': result.ratio,
        'reason': result.test_result.reason(),
        'status': result.test_result.status(),
        'compile_duration': result.cur.compile_duration,
        'cur.execution_duration': result.cur.execution_duration,
        'ref.execution_duration': result.ref.execution_duration,
    }

    report_fn = 'report.json'
    with (output / report_fn).open('w') as fp:
        json.dump(report, fp)

    src = output_root
    dst = outdir / 'test'
    copy_tree(str(src), str(dst))

    if retcode:
        sys.exit(retcode)
    if not result.test_result.is_ok():
        sys.exit(1)
Esempio n. 6
0
def run(args, remainder):
    util.verbose = True
    util.printf('args:', args)
    if args.shard_count:
        util.printf('running shard:', args.shard)

    with open('ci/plan.yml') as fp:
        plan = yaml.safe_load(fp)

    gpu_flops = plan['CONST']['gpu_flops']
    baseline_name = plan['CONST']['efficiency_baseline']
    platform = plan['PLATFORMS'][args.platform]
    variant_name = platform['variant']
    variant = plan['VARIANTS'][variant_name]
    arch = variant['arch']

    suites = plan['SUITES']
    suite = suites.get(args.suite)
    if suite is None:
        sys.exit('Invalid suite. Available suites: {}'.format(list(suites)))
    platform_cfg = suite['platforms'][args.platform]

    workload = suite['workloads'].get(args.workload)
    if workload is None:
        sys.exit('Invalid workload. Available workloads: {}'.format(
            list(suite['workloads'])))

    popt = util.PlanOption(suite, workload, args.platform)
    test_info = util.TestInfo(
        suite=(args.suite, suite),
        workload=(args.workload, workload),
        platform=(args.platform, util.Platform(args.platform, gpu_flops)),
        batch_size=args.batch_size,
        variant=variant,
        popt=popt,
        shard_id=args.shard,
        shards=args.shard_count,
    )

    root = pathlib.Path('tmp').resolve()
    input = root / 'input'
    output_root = root / 'test'
    output = test_info.path(output_root)

    shutil.rmtree(input, ignore_errors=True)
    if args.local:
        pkg_path = pathlib.Path('bazel-bin/pkg.tar.gz')
        outdir = root / 'nas'
        version = '0.0.0.dev0'
    else:
        archive_path = os.path.join('tmp', 'build', variant_name, 'pkg.tar.gz')
        util.buildkite_download(archive_path, '.')
        pkg_path = root / 'build' / variant_name / 'pkg.tar.gz'
        outdir = root
        version = args.version

    util.printf('--- Extracting {} -> {}'.format(pkg_path, input))
    with tarfile.open(str(pkg_path), 'r') as tar:
        tar.extractall(input)

    shutil.rmtree(output_root, ignore_errors=True)
    output.mkdir(parents=True)

    cwd = popt.get('cwd', '.')
    spec = pathlib.Path(popt.get('conda_env'))

    util.printf('--- :snake: Creating conda env from {}'.format(spec))
    instance_name = os.getenv('BUILDKITE_AGENT_NAME', 'harness')
    sig = hashlib.md5()
    sig.update(spec.read_bytes())
    base_path = pathlib.Path('~', '.t2', instance_name,
                             sig.hexdigest()).expanduser()

    base_env = util.CondaEnv(base_path)
    base_env.create(spec)
    conda_env = base_env.clone(root / pathlib.Path('cenv'))
    env = os.environ.copy()
    env.update(conda_env.env())

    for whl in popt.get('wheels', []):
        whl_filename = whl.format(arch=arch, version=version)
        whl_path = input / whl_filename
        conda_env.install(whl_path)

    if 'stripe' in args.platform:
        env['PLAIDML_USE_STRIPE'] = '1'
    else:
        env['PLAIDML_USE_STRIPE'] = '0'
    if 'cuda' in args.platform:
        env['CUDA_DEVICE_ORDER'] = buildkite_metadata('CUDA_CUDA_DEVICE_ORDER',
                                                      'PCI_BUS_ID')
        env['CUDA_VISIBLE_DEVICES'] = buildkite_metadata(
            'CUDA_VISIBLE_DEVICES', '0')
    env['PLAIDML_DEVICE_IDS'] = buildkite_metadata('PLAIDML_DEVICE_IDS')
    env['PLAIDML_EXPERIMENTAL'] = buildkite_metadata('PLAIDML_EXPERIMENTAL',
                                                     '0')
    device = buildkite_metadata('PLAIDML_DEVICE')
    target = buildkite_metadata('PLAIDML_TARGET')
    if device != None:
        env['PLAIDML_DEVICE'] = device
    if target != None:
        env['PLAIDML_TARGET'] = target

    util.printf(
        '--- :bazel: Running test {suite}/{workload} on {platform}'.format(
            suite=args.suite,
            workload=args.workload,
            platform=args.platform,
        ))

    cmd_args = platform_cfg.get('prepend_args', []) + popt.get(
        'prepend_args', [])
    cmd_args += platform_cfg.get('args', []) + popt.get('args', [])
    cmd_args += platform_cfg.get('append_args', []) + popt.get(
        'append_args', [])

    if args.shard_count:
        cmd_args += ['--shard', str(args.shard)]
        cmd_args += ['--shard-count', str(args.shard_count)]

    ctx = dict(
        results=output,
        batch_size=args.batch_size,
        workload=args.workload,
    )
    cmd_args = [str(x).format(**ctx) for x in cmd_args]
    if 'stripe' in args.platform:
        try:
            cmd_args.remove('--no-kernel-timing')
        except ValueError:
            pass

    runner = shutil.which(popt.get('runner'), path=env['PATH'])
    cmd = [runner] + cmd_args
    retcode = util.call(cmd, cwd=cwd, env=env)

    build_url = os.getenv('BUILDKITE_BUILD_URL')
    if build_url:
        build_url = '{}#{}'.format(build_url, os.getenv('BUILDKITE_JOB_ID'))
    else:
        build_url = DEFAULT_BUILD_URL

    golden_info = util.TestInfo(
        suite=(args.suite, suite),
        workload=(args.workload, workload),
        platform=(baseline_name, util.Platform(baseline_name, gpu_flops)),
        batch_size=args.batch_size,
        variant=variant,
        popt=popt,
    )

    result = analysis.Result(output_root, test_info, golden_info)
    report = {
        'build_url': build_url,
        'compare': result.test_result.compare,
        'efficiency': result.efficiency,
        'errors': result.test_result.errors,
        'failures': result.test_result.failures,
        'ratio': result.ratio,
        'reason': result.test_result.reason(),
        'status': result.test_result.status(),
        'compile_duration': result.cur.compile_duration,
        'cur.execution_duration': result.cur.execution_duration,
        'ref.execution_duration': result.ref.execution_duration,
    }

    with (output / 'report.json').open('w') as fp:
        json.dump(report, fp)

    if retcode:
        sys.exit(retcode)
    if not result.test_result.is_ok():
        sys.exit(1)