Exemple #1
0
def CreatePackageArchiveSidecar(archive_path: pathlib.Path,
                                manifest: dpack_pb2.DataPackage,
                                sidecar_path: pathlib.Path) -> None:
    """Create a sidecar manifest to accompany an archive.

  Args:
    archive_path: The path of the archive tarball.
    manifest: A DataPackage manifest instance.
    sidecar_path: The path of the sidecar to create

  Raises:
    OSError: If sidecar_path already exists, or archive_path does not.
  """
    if sidecar_path.exists():
        raise OSError(f'Refusing to overwrite {sidecar_path}.')
    if not archive_path.is_file():
        raise OSError(f'Archive {archive_path} does not exist')

    sidecar = dpack_pb2.DataPackage()
    sidecar.CopyFrom(manifest)
    # Clear the file attributes. Only the file names and comments are stored in the sidecar.
    for f in sidecar.file:
        if not f.comment:
            f.ClearField("comment")
        f.ClearField("size_in_bytes")
        f.ClearField("checksum_hash")
        f.ClearField("checksum")
    sidecar.checksum_hash = dpack_pb2.SHA256
    sidecar.checksum = crypto.sha256_file(archive_path)
    pbutil.ToFile(sidecar, sidecar_path)
    logging.info('Wrote %s', sidecar_path.absolute())
Exemple #2
0
def test_ToFile_path_is_directory(suffix):
    """Test that IsADirectoryError raised if path is a directory."""
    with tempfile.TemporaryDirectory(suffix=suffix) as d:
        proto = test_protos_pb2.TestMessage(string='abc', number=1)
        with pytest.raises(IsADirectoryError) as e_info:
            pbutil.ToFile(proto, pathlib.Path(d))
        assert str(e_info.value).endswith(f"Is a directory: '{d}'")
Exemple #3
0
def test_ToFile_message_missing_required_fields(suffix):
    """Test that EncodeError is raised if required field is not set."""
    with tempfile.NamedTemporaryFile(prefix='labm8_proto_',
                                     suffix=suffix) as f:
        proto = test_protos_pb2.TestMessage(number=1)
        with pytest.raises(pbutil.EncodeError):
            pbutil.ToFile(proto, pathlib.Path(f.name))
Exemple #4
0
def test_ToFile_parent_directory_does_not_exist(suffix):
    """Test that FileNotFoundError raised if parent directory doesn't exist."""
    with tempfile.TemporaryDirectory() as d:
        proto = test_protos_pb2.TestMessage(string='abc', number=1)
        with pytest.raises(FileNotFoundError):
            pbutil.ToFile(proto,
                          pathlib.Path(d) / 'notadir' / f'proto{suffix}')
Exemple #5
0
def _CreateTestRepo(root_dir: pathlib.Path, owner: str, name: str) -> None:
  """Create an empty repo for testing indexers."""
  owner_name = f'{owner}_{name}'
  (root_dir / owner_name / '.git').mkdir(parents=True)
  (root_dir / owner_name / 'src').mkdir(parents=True)
  pbutil.ToFile(scrape_repos_pb2.GitHubRepoMetadata(owner=owner, name=name),
                root_dir / f'{owner_name}.pbtxt')
def ExportOpenCLResults(cursor, start_id, proto_dir):
    batch_size = 1000
    result_id = start_id
    while True:
        logging.info('Exporting batch of %s results',
                     humanize.intcomma(batch_size))
        cursor.execute(
            """
SELECT
  results.id,
  assertions.assertion,
  results.outcome,
  programs.src
FROM results
LEFT JOIN testbeds ON results.testbed_id = testbeds.id
LEFT JOIN platforms ON testbeds.platform_id = platforms.id
LEFT JOIN testcases ON results.testcase_id = testcases.id
LEFT JOIN programs ON testcases.program_id = programs.id
LEFT JOIN stderrs ON results.stderr_id = stderrs.id
LEFT JOIN assertions ON stderrs.assertion_id = assertions.id
WHERE results.id >= %s
AND programs.generator = 1
AND testbeds.id = (
  SELECT testbeds.id
    FROM testbeds
    LEFT JOIN platforms ON testbeds.platform_id=platforms.id
  WHERE platform = 'clang'
  AND driver = '3.6.2'
)
ORDER BY results.id
LIMIT %s
""", (result_id, batch_size))
        i = 0
        for row in cursor:
            i += 1
            (
                result_id,
                assertion_text,
                outcome_num,
                program_src,
            ) = row

            outcome = fish_pb2.CompilerCrashDiscriminatorTrainingExample.Outcome.Name(
                outcome_num).lower()
            proto = fish_pb2.CompilerCrashDiscriminatorTrainingExample(
                src=program_src,
                outcome=outcome_num,
                raised_assertion=True if assertion_text else False,
                assertion_name=(GetClangAssertionStub(assertion_text)
                                if assertion_text else ''))
            pbutil.ToFile(proto,
                          proto_dir / outcome / (str(result_id) + '.pbtxt'))

        # If we received fewer results than the requested batch size, then we have
        # ran out of data.
        if i < batch_size:
            return
Exemple #7
0
 def _create_lock():
     lockfile = lockfile_pb2.LockFile(
         owner_process_id=os.getpid() if pid is None else pid,
         owner_process_argv=' '.join(sys.argv),
         date_acquired_utc_epoch_ms=labdate.MillisecondsTimestamp(
             labdate.GetUtcMillisecondsNow()),
         owner_hostname=system.HOSTNAME,
         owner_user=system.USERNAME)
     pbutil.ToFile(lockfile, self.path, assume_filename='LOCK.pbtxt')
Exemple #8
0
def test_FromFile_required_fields_not_set_uninitialized_okay(suffix):
    """Test that DecodeError not raised if required fields not set."""
    with tempfile.NamedTemporaryFile(prefix='labm8_proto_',
                                     suffix=suffix) as f:
        proto_in = test_protos_pb2.AnotherTestMessage(number=1)
        pbutil.ToFile(test_protos_pb2.AnotherTestMessage(number=1),
                      pathlib.Path(f.name))
        pbutil.FromFile(pathlib.Path(f.name),
                        test_protos_pb2.TestMessage(),
                        uninitialized_okay=True)
Exemple #9
0
def test_FromFile_required_fields_not_set(suffix):
    """Test that DecodeError raised if required fields not set."""
    with tempfile.NamedTemporaryFile(prefix='labm8_proto_',
                                     suffix=suffix) as f:
        pbutil.ToFile(test_protos_pb2.AnotherTestMessage(number=1),
                      pathlib.Path(f.name))
        with pytest.raises(pbutil.DecodeError) as e_info:
            pbutil.FromFile(pathlib.Path(f.name),
                            test_protos_pb2.TestMessage())
        assert f"Required fields not set: '{f.name}'" == str(e_info.value)
Exemple #10
0
 def EpochEndCallback(self, epoch: int, loss: float):
     now = labdate.MillisecondsTimestamp()
     epoch_time_ms = now - self.last_epoch_begin_timestamp
     telemetry = telemetry_pb2.ModelEpochTelemetry(
         timestamp_utc_epoch_ms=now,
         epoch_num=epoch,
         epoch_wall_time_ms=epoch_time_ms,
         loss=loss,
     )
     pbutil.ToFile(telemetry,
                   self.logdir / f'epoch_{epoch:03d}_telemetry.pbtxt')
Exemple #11
0
def test_ImportFromLanguage_Java_repo(tempdir: pathlib.Path):
  """An end-to-end test of a Java importer."""
  (tempdir / 'src').mkdir()
  (tempdir / 'src' / 'Owner_Name' / '.git').mkdir(parents=True)
  (tempdir / 'src' / 'Owner_Name' / 'src').mkdir(parents=True)

  # A repo will only be imported if there is a repo meta file.
  pbutil.ToFile(scrape_repos_pb2.GitHubRepoMetadata(
      owner='Owner',
      name='Name'),
      tempdir / 'src' / 'Owner_Name.pbtxt')

  # Create some files in our test repo.
  with open(tempdir / 'src' / 'Owner_Name' / 'src' / 'A.java', 'w') as f:
    f.write("""
public class A {
  public static void helloWorld() {
    System.out.println("Hello, world!");
  }
}
""")
  with open(tempdir / 'src' / 'Owner_Name' / 'src' / 'B.java', 'w') as f:
    f.write("""
public class B {
  private static int foo() {return 5;}
}
""")
  with open(tempdir / 'src' / 'Owner_Name' / 'README.txt', 'w') as f:
    f.write('Hello, world!')

  language = scrape_repos_pb2.LanguageToClone(
      language='foolang',
      query=[],
      destination_directory=str(tempdir / 'src'),
      importer=[
        scrape_repos_pb2.ContentFilesImporterConfig(
            source_code_pattern='.*\\.java',
            preprocessor=["datasets.github.scrape_repos.preprocessors."
                          "extractors:JavaMethods"]),
      ]
  )
  indexer.ImportFromLanguage(language, multiprocessing.Pool(1))

  test_repo = github_repo.GitHubRepo(tempdir / 'src' / 'Owner_Name.pbtxt')
  assert (test_repo.index_dir / 'DONE.txt').is_file()
  assert len(list(test_repo.index_dir.iterdir())) == 3
  contentfiles = list(test_repo.ContentFiles())
  assert len(contentfiles) == 2
  assert set([cf.text for cf in contentfiles]) == {
    ('public static void helloWorld(){\n'
     '  System.out.println("Hello, world!");\n}\n'),
    'private static int foo(){\n  return 5;\n}\n',
  }
Exemple #12
0
def test_ToFile_FromFile_equivalence(suffix):
    """Test that ToFile() and FromFile() are symmetrical."""
    with tempfile.TemporaryDirectory(prefix='labm8_proto_') as d:
        path = pathlib.Path(d) / f'proto{suffix}'
        proto_in = test_protos_pb2.TestMessage(string='abc', number=1)
        pbutil.ToFile(proto_in, path)
        assert path.is_file()
        proto_out = test_protos_pb2.TestMessage()
        pbutil.FromFile(path, proto_out)
        assert proto_out.string == 'abc'
        assert proto_out.number == 1
        assert proto_in == proto_out
Exemple #13
0
def main(argv):
  if len(argv) > 1:
    unknown_args = ', '.join(argv[1:])
    raise app.UsageError(f"Unknown arguments {unknown_args}")

  logging.info('Preparing OpenCL testbed.')
  config = harness_pb2.CldriveHarness()
  config.opencl_env.extend([env.OclgrindOpenCLEnvironment().name])
  config.opencl_opt.extend([FLAGS.opencl_opt])
  harness = cldrive.CldriveHarness(config)
  assert len(harness.testbeds) >= 1

  input_directories = FLAGS.input_directories
  logging.info('Reading testcases from: %s', ' '.join(input_directories))

  output_directory = pathlib.Path(FLAGS.output_directory)
  logging.info('Writing results to %s', output_directory)
  output_directory.mkdir(parents=True, exist_ok=True)

  # Load testcases.
  testcase_dirs = [
    pathlib.Path(x) for x in input_directories if
    pathlib.Path(x).is_dir()]
  if not testcase_dirs:
    raise app.UsageError('No --input_directories found.')
  testcase_paths = labtypes.flatten(
      [[pathlib.Path(y) for y in fs.ls(x, abspaths=True)]
       for x in testcase_dirs])
  testcases = [
    pbutil.FromFile(path, deepsmith_pb2.Testcase()) for path in testcase_paths]
  logging.info('Read %d testcases.', len(testcases))
  if not len(testcases):
    raise app.UsageError("No testcases found: '%s'",
                         ' '.join(input_directories))

  # Execute testcases.
  req = harness_pb2.RunTestcasesRequest()
  req.testbed.CopyFrom(harness.testbeds[0])
  req.testcases.extend(testcases)
  res = harness.RunTestcases(req, None)

  # Write results to file.
  for testcase, result in zip(testcases, res.results):
    result_id = crypto.md5_str(str(testcase))
    pbutil.ToFile(result, output_directory / f'{result_id}.pbtxt')

  logging.info('Executed %d testcases and wrote results to %s',
               len(res.results), output_directory)
  execution_times = [
    result.profiling_events[0].duration_ms for result in res.results]
  logging.info('Average time to evaluate testcase: %.2f ms',
               sum(execution_times) / len(execution_times))
Exemple #14
0
def InitManifest(package_dir: pathlib.Path,
                 contents: typing.List[pathlib.Path], update: bool) -> None:
    """Write the MANIFEST.pbtxt file for a package."""
    manifest = CreatePackageManifest(package_dir, contents)
    manifest_path = package_dir / 'MANIFEST.pbtxt'
    if update and pbutil.ProtoIsReadable(manifest_path,
                                         dpack_pb2.DataPackage()):
        old = pbutil.FromFile(manifest_path, dpack_pb2.DataPackage())
        MergeManifests(manifest, old)
    elif manifest_path.is_file():
        raise OSError('Refusing to overwrite MANIFEST.pbtxt file.')
    pbutil.ToFile(manifest, manifest_path)
    logging.info('Wrote %s', manifest_path.absolute())
Exemple #15
0
def TestingLoop(min_interesting_results: int,
                max_testing_time_seconds: int,
                batch_size: int,
                generator: base_generator.GeneratorServiceBase,
                dut_harness: base_harness.HarnessBase,
                gs_harness: base_harness.HarnessBase,
                filters: difftests.FiltersBase,
                interesting_results_dir: pathlib.Path,
                start_time: float = None) -> None:
    """The main fuzzing loop.

  Args:
    min_interesting_results: The minimum number of interesting results to find.
    max_testing_time_seconds: The maximum time allowed to find interesting
      results.
    batch_size: The number of testcases to generate and execute in each batch.
    generator: A testcase generator.
    dut_harness: The device under test.
    gs_harness: The device to compare outputs against.
    filters: A filters instance for testcases.
    interesting_results_dir: The directory to write interesting results to.
    start_time: The starting time, as returned by time.time(). If not provided,
      the starting time will be the moment that this function is called. Set
      this value if you would like to include initialization overhead in the
      calculated testing time.
  """
    start_time = start_time or time.time()
    interesting_results_dir.mkdir(parents=True, exist_ok=True)
    num_interesting_results = 0
    batch_num = 0
    while (num_interesting_results < min_interesting_results
           and time.time() < start_time + max_testing_time_seconds):
        batch_num += 1
        logging.info('Starting generate / test / eval batch %d ...', batch_num)
        interesting_results = RunBatch(generator, dut_harness, gs_harness,
                                       filters, batch_size)
        num_interesting_results += len(interesting_results)
        for result in interesting_results:
            pbutil.ToFile(
                result, interesting_results_dir /
                (str(labdate.MillisecondsTimestamp()) + '.pbtxt'))

    logging.info(
        'Stopping after %.2f seconds and %s batches (%.0fms / testcase).\n'
        'Found %s interesting results.',
        time.time() - start_time, humanize.intcomma(batch_num),
        (((time.time() - start_time) / (batch_num * batch_size)) * 1000),
        num_interesting_results)
    logging.flush()
Exemple #16
0
  def MakeRepositoryMetas(self,
                          repos: typing.List[Repository.Repository]) -> None:
    """Make meta files for a list of repositories.

    Args:
      repos: A list of GitHub Repository instances.
    """
    logging.debug('Scraping %s repositories', humanize.intcomma(len(repos)))
    for repo in repos:
      self.i += 1
      concat_name = '_'.join([repo.owner.login, repo.name])
      clone_dir = self.destination_directory / concat_name
      meta_path = pathlib.Path(str(clone_dir) + '.pbtxt')
      if not pbutil.ProtoIsReadable(meta_path,
                                    scrape_repos_pb2.GitHubRepoMetadata()):
        meta = GetRepositoryMetadata(repo)
        logging.debug('%s', meta)
        pbutil.ToFile(meta, meta_path)
Exemple #17
0
def SampleModel(instance: clgen.Instance) -> None:
    """Take --output_corpus_size samples from model."""
    logging.info('Training and sampling the CLgen model ...')
    target_samples = FLAGS.output_corpus_size
    sample_dir = instance.model.SamplerCache(instance.sampler)
    sample_dir.mkdir(exist_ok=True)
    num_samples = len(list(sample_dir.iterdir()))
    logging.info('Need to generate %d samples in %s',
                 max(target_samples - num_samples, 0), sample_dir)
    if num_samples < target_samples:
        sample_lock = lockfile.LockFile(sample_dir / 'LOCK')
        with sample_lock.acquire(replace_stale=True, block=True):
            num_samples = len(list(sample_dir.iterdir()))
            while num_samples < target_samples:
                samples = instance.model.SampleFast(
                    instance.sampler, target_samples - num_samples)
                for sample in samples:
                    sample_id = crypto.sha256_str(sample.text)
                    pbutil.ToFile(sample, sample_dir / f'{sample_id}.pbtxt')
                num_samples = len(list(sample_dir.iterdir()))
Exemple #18
0
def IndexContentFiles(job: scrape_repos_pb2.ImportWorker) -> None:
  """Index content files."""
  relpath = job.abspath[len(str(job.clone_dir)) + 1:]
  try:
    texts = preprocessors.Preprocess(pathlib.Path(job.clone_dir), relpath,
                                     job.all_files_relpaths, job.preprocessors)
    for i, text in enumerate(texts):
      sha256 = hashlib.sha256(text.encode('utf-8'))
      proto = scrape_repos_pb2.ContentFile(
          clone_from_url=job.clone_from_url,
          relpath=relpath,
          artifact_index=i,
          sha256=sha256.digest(),
          charcount=len(text),
          linecount=len(text.split('\n')),
          text=text)
      path = pathlib.Path(job.index_dir) / (
          binascii.hexlify(proto.sha256).decode('utf-8') + '.pbtxt')
      pbutil.ToFile(proto, path)
  except UnicodeDecodeError:
    logging.warning('Failed to decode %s', relpath)
Exemple #19
0
def GenerateTestcases(generator_config: generator_pb2.ClgenGenerator,
                      output_directory: pathlib.Path,
                      num_testcases: int) -> None:
    logging.info('Writing output to %s', output_directory)
    (output_directory / 'generated_kernels').mkdir(parents=True, exist_ok=True)
    (output_directory / 'generated_testcases').mkdir(parents=True,
                                                     exist_ok=True)

    logging.info('Preparing test case generator.')
    generator = clgen.ClgenGenerator(generator_config)

    # Generate testcases.
    logging.info('Generating %d testcases ...', num_testcases)
    req = generator_pb2.GenerateTestcasesRequest()
    req.num_testcases = num_testcases
    res = generator.GenerateTestcases(req, None)

    for testcase in res.testcases:
        # Write kernel to file.
        kernel = testcase.inputs['src']
        kernel_id = crypto.md5_str(kernel)
        with open(output_directory / 'generated_kernels' / f'{kernel_id}.cl',
                  'w') as f:
            f.write(kernel)

        # Write testcase to file.
        testcase_id = crypto.md5_str(str(testcase))
        pbutil.ToFile(
            testcase,
            output_directory / 'generated_testcases' / f'{testcase_id}.pbtxt')

    logging.info('%d testcases written to %s', num_testcases,
                 output_directory / 'generated_testcases')
    generation_times = [
        testcase.profiling_events[0].duration_ms for testcase in res.testcases
    ]
    logging.info('Average time to generate testcase: %.2f ms',
                 sum(generation_times) / len(generation_times))
Exemple #20
0
def DifftestTestcase(s: db.session_t, t: testcase.Testcase,
                     outdir: pathlib.Path) -> None:
    """Difftest a testcase."""
    results = list(s.query(result.Result).filter(result.Result.testcase == t))
    for r in results:
        r.output_class = GetResultOutputClass(r)
    majority = GetMajorityOutput(results)

    def OutputPath(result_class: str) -> pathlib.Path:
        try:
            if r.testbed.opts['opencl_opt'] == 'enabled':
                opt = '+'
            elif r.testbed.opts['opencl_opt'] == 'disabled':
                opt = '-'
            else:
                raise KeyError
        except KeyError:
            raise LookupError(str(r.testbed))
        testbeds = sorted(x[0] for x in s.query(testbed.Testbed.name))
        dir = outdir / result_class / str(testbeds.index(r.testbed.name)) / opt
        dir.mkdir(parents=True, exist_ok=True)
        return dir / (str(r.id) + '.pbtxt')

    for r in results:
        if r.output_class == 'Build crash':
            pbutil.ToFile(r.ToProto(), OutputPath('bc'))
        elif r.output_class == 'Build timeout':
            pbutil.ToFile(r.ToProto(), OutputPath('bto'))
        elif (majority.majority_outcome == 'Pass'
              and r.output_class == 'Build failure'):
            pbutil.ToFile(r.ToProto(), OutputPath('abf'))
        elif (majority.majority_outcome == 'Pass'
              and r.output_class == 'Runtime crash'):
            pbutil.ToFile(r.ToProto(), OutputPath('arc'))
        elif (r.outputs['stdout'] != majority.majority_stdout
              and majority.majority_outcome == 'Pass'
              and majority.stdout_majority_size >= math.ceil(
                  2 * majority.outcome_majority_size / 3)):
            pbutil.ToFile(r.ToProto(), OutputPath('awo'))
        else:
            pbutil.ToFile(r.ToProto(), OutputPath('pass'))
Exemple #21
0
    def Sample(self,
               sampler: samplers.Sampler,
               min_num_samples: int,
               seed: int = None) -> typing.List[model_pb2.Sample]:
        """Sample a model.

    If the model is not already trained, calling Sample() first trains the
    model. Thus a call to Sample() is equivalent to calling Train() then
    Sample().

    Args:
      sampler: The sampler to sample using.
      min_num_samples: The minimum number of samples to return. Note that the
        true number of samples returned may be higher than this value, as
        sampling occurs in batches. The model will continue producing samples
        until the lowest mulitple of the sampler batch size property that is
        larger than this value. E.g. if min_num_samples is 7 and the Sampler
        batch size is 10, 10 samples will be returned.
      seed: A numeric value to seed the RNG with. If not present, the RNG is
        seeded randomly.

    Returns:
      A list of Sample protos.

    Raises:
      UnableToAcquireLockError: If the model is locked (i.e. there is another
        process currently modifying the model).
      InvalidStartText: If the sampler start text cannot be encoded.
      InvalidSymtokTokens: If the sampler symmetrical depth tokens cannot be
        encoded.
    """
        self.Train()

        sample_count = 1
        self.SamplerCache(sampler).mkdir(exist_ok=True)
        with logutil.TeeLogsToFile(f'sampler_{sampler.hash}',
                                   self.cache.path / 'logs'):
            logging.info("Sampling: '%s'", sampler.start_text)
            if min_num_samples < 0:
                logging.warning(
                    'Entering an infinite sample loop, this process will never end!'
                )
            sample_start_time = labdate.MillisecondsTimestamp()

            atomizer = self.corpus.atomizer
            sampler.Specialize(atomizer)
            batch_size = self.backend.InitSampling(sampler, seed)

            samples = []
            sample_dir = self.SamplerCache(sampler)

            # Per-sample batch outer loop. Continues until we have as many samples
            # as we want.
            while True:
                samples_in_progress = [
                    sampler.tokenized_start_text.copy()
                    for _ in range(batch_size)
                ]
                done = np.zeros(batch_size, dtype=np.bool)
                start_time = labdate.MillisecondsTimestamp()
                wall_time_start = start_time

                self.backend.InitSampleBatch(sampler, batch_size)

                # Sampling loop. Continues until all samples in the batch are done.
                while True:
                    indices = self.backend.SampleNextIndices(
                        sampler, batch_size)

                    # Iterate over all samples in batch to determine whether they're
                    # done.
                    for i in range(batch_size):
                        if done[i]:
                            continue

                        token = atomizer.decoder[indices[i]]
                        samples_in_progress[i].append(token)
                        if sampler.SampleIsComplete(samples_in_progress[i]):
                            end_time = labdate.MillisecondsTimestamp()
                            done[i] = 1
                            sample = model_pb2.Sample(
                                text=''.join(samples_in_progress[i]),
                                sample_start_epoch_ms_utc=start_time,
                                sample_time_ms=end_time - start_time,
                                wall_time_ms=end_time - wall_time_start,
                                num_tokens=len(samples_in_progress[i]))
                            print(f'=== BEGIN CLGEN SAMPLE {sample_count} '
                                  f'===\n\n{sample.text}\n')
                            sample_count += 1
                            sample_id = crypto.sha256_str(sample.text)
                            sample_path = sample_dir / f'{sample_id}.pbtxt'
                            pbutil.ToFile(sample, sample_path)
                            if min_num_samples > 0:
                                samples.append(sample)
                            wall_time_start = labdate.MillisecondsTimestamp()

                    # Complete the batch.
                    if done.all():
                        break

                # Complete sampling. Note that sample_count starts at 1.
                if sample_count > min_num_samples:
                    now = labdate.MillisecondsTimestamp()
                    logging.info(
                        'Produced %s samples at a rate of %s ms / sample.',
                        humanize.intcomma(len(samples)),
                        humanize.intcomma(
                            int((now - sample_start_time) /
                                max(len(samples), 1))))
                    break

        return samples
Exemple #22
0
def clsmith_result(dummy_result: deepsmith_pb2.Result) -> pathlib.Path:
    """A test fixture which returns a dummy CLSmith result."""
    dummy_result.testcase.harness.name = 'cl_launcher'
    with tempfile.TemporaryDirectory(prefix='phd_') as d:
        pbutil.ToFile(dummy_result, pathlib.Path(d) / 'result.pbtxt')
        yield pathlib.Path(d) / 'result.pbtxt'
Exemple #23
0
def _ExportOpenCLResults(cursor, program_id, proto_dir):
    cursor.execute(
        """
SELECT
  results.id,
  programs.id,
  testcases.id,
  platforms.platform,
  platforms.device,
  platforms.driver,
  platforms.opencl,
  platforms.devtype,
  platforms.host,
  testbeds.optimizations,
  programs.generator,
  programs.date,
  programs.generation_time,
  programs.src,
  testcases.harness,
  testcases.timeout,
  results.date,
  results.returncode,
  results.runtime,
  stdouts.stdout,
  stderrs.stderr,
  stderrs.truncated,
  threads.gsize_x,
  threads.gsize_y,
  threads.gsize_z,
  threads.lsize_x,
  threads.lsize_y,
  threads.lsize_z,
  clsmith_testcase_metas.oclverified,
  dsmith_testcase_metas.gpuverified,
  dsmith_testcase_metas.oclverified,
  dsmith_program_metas.contains_floats,
  dsmith_program_metas.vector_inputs,
  dsmith_program_metas.compiler_warnings
FROM results
LEFT JOIN testbeds ON results.testbed_id = testbeds.id
LEFT JOIN platforms ON testbeds.platform_id = platforms.id
LEFT JOIN testcases ON results.testcase_id = testcases.id
LEFT JOIN programs ON testcases.program_id = programs.id
LEFT JOIN threads ON testcases.threads_id = threads.id
LEFT JOIN stdouts ON results.stdout_id = stdouts.id
LEFT JOIN stderrs ON results.stderr_id = stderrs.id
LEFT JOIN clsmith_testcase_metas ON testcases.id=clsmith_testcase_metas.id
LEFT JOIN dsmith_testcase_metas ON testcases.id=dsmith_testcase_metas.id
LEFT JOIN dsmith_program_metas ON programs.id=dsmith_program_metas.id
WHERE programs.id = %s AND platforms.platform <> 'clang'
""", (program_id, ))

    i = 0
    for row in cursor:
        i += 1
        (result_id, programs_id, testcase_id, platform_name, device_name,
         driver_version, opencl_version, devtype, host_os, cl_opt,
         generator_id, program_date, program_generation_time, program_src,
         harness_id, harness_timeout, result_date, returncode, runtime, stdout,
         stderr, truncated_stderr, gsize_x, gsize_y, gsize_z, lsize_x, lsize_y,
         lsize_z, clsmith_oclverified, dsmith_gpuverified, dsmith_oclverified,
         dsmith_program_contains_floats, dsmith_program_vector_inputs,
         dsmith_program_compiler_warnings) = row
        inputs = {
            'src': program_src,
        }
        if harness_id != -1:
            inputs['gsize'] = f'{gsize_x},{gsize_y},{gsize_z}'
            inputs['lsize'] = f'{lsize_x},{lsize_y},{lsize_z}'
        testbed_name = OPENCL_DEVICE_MAP[device_name]
        testbed_opts = {}
        _SetIf(testbed_opts, 'opencl_device', device_name.strip())
        _SetIf(testbed_opts, 'opencl_version', opencl_version.strip())
        _SetIf(testbed_opts, 'host', HOSTS_MAP.get(host_os, host_os))
        if testbed_name == "clang":
            _SetIf(testbed_opts, 'llvm_version', driver_version.strip())
        else:
            _SetIf(testbed_opts, 'driver_version', driver_version.strip())
            _SetIf(testbed_opts, 'opencl_devtype',
                   OPENCL_DEVTYPE_MAP.get(devtype, devtype))
            _SetIf(testbed_opts, 'opencl_platform', platform_name.strip())
            _SetIf(testbed_opts, 'opencl_opt',
                   'enabled' if cl_opt else 'disabled')
        invariant_opts = {}
        if clsmith_oclverified == 0:
            invariant_opts['oclverify'] = 'fail'
        elif clsmith_oclverified == 1:
            invariant_opts['oclverify'] = 'pass'
        elif dsmith_oclverified == 0:
            invariant_opts['oclverify'] = 'fail'
        elif dsmith_oclverified == 1:
            invariant_opts['oclverify'] = 'pass'
        if dsmith_gpuverified == 0:
            invariant_opts['gpuverify'] = 'fail'
        elif dsmith_gpuverified == 1:
            invariant_opts['gpuverify'] = 'pass'
        if dsmith_program_contains_floats == 0:
            invariant_opts['kernel_uses_floats'] = 'false'
        elif dsmith_program_contains_floats == 1:
            invariant_opts['kernel_uses_floats'] = 'true'
        if dsmith_program_vector_inputs == 0:
            invariant_opts['kernel_has_vector_inputs'] = 'false'
        elif dsmith_program_vector_inputs == 1:
            invariant_opts['kernel_has_vector_inputs'] = 'true'
        if dsmith_program_compiler_warnings == 0:
            invariant_opts['kernel_throws_compiler_warning'] = 'false'
        elif dsmith_program_compiler_warnings == 1:
            invariant_opts['kernel_throws_compiler_warning'] = 'true'
        testbed = deepsmith_pb2.Testbed(
            toolchain='opencl',
            name=testbed_name,
            opts=testbed_opts,
        )

        testcase = deepsmith_pb2.Testcase(
            toolchain="opencl",
            generator=_GetOpenCLGenerator(generator_id),
            harness=_GetOpenCLHarness(harness_id, harness_timeout),
            inputs=inputs,
            invariant_opts=invariant_opts,
            profiling_events=[
                deepsmith_pb2.ProfilingEvent(
                    client="cc1",
                    type="generation",
                    duration_ms=int(program_generation_time * 1000),
                    event_start_epoch_ms=labdate.MillisecondsTimestamp(
                        program_date),
                ),
            ])
        result = deepsmith_pb2.Result(
            testcase=testcase,
            testbed=testbed,
            returncode=returncode,
            outputs={
                "stdout": stdout,
                "stderr": stderr,
            },
            profiling_events=[
                deepsmith_pb2.ProfilingEvent(
                    client={
                        'Ubuntu 16.04 64bit': 'cc1',
                        'CentOS Linux 7.1.1503 64bit': 'fuji',
                        'openSUSE  13.1 64bit': 'kobol',
                    }[host_os],
                    type="runtime",
                    duration_ms=int(runtime * 1000),
                    event_start_epoch_ms=labdate.MillisecondsTimestamp(
                        result_date),
                ),
            ],
        )
        # Write the testcase to file.
        outpath = proto_dir / 'testcases' / (str(testcase_id) + '.pbtxt')
        pbutil.ToFile(testcase, outpath)
        # Write the results to file.
        outpath = proto_dir / 'results' / (str(result_id) + '.pbtxt')
        pbutil.ToFile(result, outpath)
Exemple #24
0
def abc_instance_file(abc_instance_config) -> str:
  """A test fixture that returns a path to an Instance config file."""
  with tempfile.NamedTemporaryFile() as f:
    pbutil.ToFile(abc_instance_config, pathlib.Path(f.name))
    yield f.name
Exemple #25
0
def main(argv):
  """Main entry point."""
  if len(argv) > 1:
    raise app.UsageError("Unknown arguments: '{}'.".format(' '.join(argv[1:])))

  if not FLAGS.export_path:
    raise app.UsageError('--export_path must be a directory')
  export_path = pathlib.Path(FLAGS.export_path)
  if export_path.is_file():
    raise app.UsageError('--export_path must be a directory')
  export_path.mkdir(parents=True, exist_ok=True)

  if not FLAGS.dataset_root:
    raise app.UsageError('--dataset_root must be a directory')
  dataset_root = pathlib.Path(FLAGS.dataset_root)
  if dataset_root.is_file():
    raise app.UsageError('--dataset_root must be a directory')
  dataset_root.mkdir(parents=True, exist_ok=True)

  ratios = DatasetRatios(
      FLAGS.training_ratio, FLAGS.validation_ratio, FLAGS.testing_ratio)
  assert sum(ratios) <= 1

  # Load protos.
  positive_protos = LoadPositiveProtos(
      export_path, FLAGS.positive_class_outcomes, FLAGS.max_src_len,
      FLAGS.max_protos, FLAGS.assertions_only)
  positive_protos, negative_protos = LoadNegativeProtos(
      export_path, positive_protos, FLAGS.negative_class_outcomes,
      FLAGS.max_src_len, FLAGS.balance_class_lengths,
      FLAGS.balance_class_counts)

  positive_sizes = DatasetSizes(
      int(len(positive_protos) * FLAGS.training_ratio),
      int(len(positive_protos) * FLAGS.validation_ratio),
      int(len(positive_protos) * FLAGS.testing_ratio),
  )
  negative_sizes = DatasetSizes(
      int(len(negative_protos) * FLAGS.training_ratio),
      int(len(negative_protos) * FLAGS.validation_ratio),
      int(len(negative_protos) * FLAGS.testing_ratio),
  )

  # Create output directories.
  (dataset_root / 'training').mkdir(exist_ok=True, parents=True)
  (dataset_root / 'validation').mkdir(exist_ok=True, parents=True)
  (dataset_root / 'testing').mkdir(exist_ok=True, parents=True)

  logging.info('Shuffling protos with seed %d', FLAGS.seed)
  random.seed(FLAGS.seed)
  random.shuffle(positive_protos)
  random.shuffle(negative_protos)

  for i, proto in enumerate(positive_protos[:positive_sizes[0]]):
    pbutil.ToFile(
        proto, (dataset_root / 'training' / f'positive-{i:04d}.pbtxt'))
  for i, proto in enumerate(negative_protos[:negative_sizes[0]]):
    pbutil.ToFile(
        proto, (dataset_root / 'training' / f'negative-{i:04d}.pbtxt'))
  logging.info('Wrote %s training examples',
               humanize.intcomma(positive_sizes[0] + negative_sizes[0]))
  positive_protos = positive_protos[positive_sizes[0]:]
  negative_protos = negative_protos[negative_sizes[0]:]

  for i, proto in enumerate(positive_protos[:positive_sizes[1]]):
    pbutil.ToFile(
        proto, (dataset_root / 'validation' / f'positive-{i:04d}.pbtxt'))
  for i, proto in enumerate(negative_protos[:negative_sizes[1]]):
    pbutil.ToFile(
        proto, (dataset_root / 'validation' / f'negative-{i:04d}.pbtxt'))
  logging.info('Wrote %s validation examples',
               humanize.intcomma(positive_sizes[1] + negative_sizes[1]))
  positive_protos = positive_protos[positive_sizes[1]:]
  negative_protos = negative_protos[negative_sizes[1]:]

  for i, proto in enumerate(positive_protos[:positive_sizes[2]]):
    pbutil.ToFile(
        proto, (dataset_root / 'testing' / f'positive-{i:04d}.pbtxt'))
  for i, proto in enumerate(negative_protos[:negative_sizes[2]]):
    pbutil.ToFile(
        proto, (dataset_root / 'testing' / f'negative-{i:04d}.pbtxt'))
  logging.info('Wrote %s testing examples',
               humanize.intcomma(positive_sizes[2] + negative_sizes[2]))
Exemple #26
0
def dummy_lockfile_path(
    dummy_lockfile_proto: lockfile_pb2.LockFile) -> pathlib.Path:
  """Yield a path to a lockfile proto."""
  with tempfile.TemporaryDirectory() as d:
    pbutil.ToFile(dummy_lockfile_proto, pathlib.Path(d) / 'LOCK.pbtxt')
    yield pathlib.Path(d) / 'LOCK.pbtxt'
Exemple #27
0
 def _WriteMetafile(self) -> None:
     pbutil.ToFile(self.meta,
                   pathlib.Path(self.cache.keypath('META.pbtxt')))
Exemple #28
0
def main(argv):
    """Main entry point."""
    if len(argv) > 1:
        raise app.UsageError("Unknown arguments: '{}'.".format(' '.join(
            argv[1:])))

    model_dir = pathlib.Path(FLAGS.reachability_model_dir)
    model_dir.mkdir(parents=True, exist_ok=True)
    (model_dir / 'logs').mkdir(exist_ok=True)
    (model_dir / 'checkpoints').mkdir(exist_ok=True)

    logging.info('Generating graphs dataset ...')
    data = MakeReachabilityDataset(FLAGS.reachability_num_training_graphs +
                                   FLAGS.reachability_num_testing_graphs)
    training_data = reachability_pb2.ReachabilityDataset()
    training_data.entry.extend(
        data.entry[:FLAGS.reachability_num_training_graphs])
    pbutil.ToFile(training_data, model_dir / 'training_data.pbtxt')
    testing_data = reachability_pb2.ReachabilityDataset()
    testing_data.entry.extend(
        data.entry[FLAGS.reachability_num_training_graphs:])
    pbutil.ToFile(testing_data, model_dir / 'testing_data.pbtxt')

    logging.info('Number of training examples: %s.',
                 humanize.intcomma(len(training_data.entry)))
    logging.info('Number of testing examples: %s.',
                 humanize.intcomma(len(testing_data.entry)))

    n = FLAGS.reachability_num_nodes
    sequence_length = GetSequenceLength(FLAGS.reachability_num_nodes)
    logging.info('Using sequence length %s.',
                 humanize.intcomma(sequence_length))
    seqs = [ControlFlowGraphToSequence(entry.graph) for entry in data.entry]
    text = '\n'.join(seqs)
    logging.info('Deriving atomizer from %s chars.',
                 humanize.intcomma(len(text)))
    atomizer = atomizers.AsciiCharacterAtomizer.FromText(text)
    logging.info('Vocabulary size: %s.',
                 humanize.intcomma(len(atomizer.vocab)))
    with open(model_dir / 'atomizer.pkl', 'wb') as f:
        pickle.dump(atomizer, f)
    logging.info('Pickled atomizer to %s.', model_dir / 'atomizer.pkl')

    x, y = ProtosToModelData(training_data, sequence_length, atomizer)
    logging.info('Training data: x %s, y[%s] %s', x.shape, len(y), y[0].shape)

    test_x, test_y = ProtosToModelData(testing_data, sequence_length, atomizer)
    logging.info('Testing data: x %s, y[%s] %s', test_x.shape, len(test_y),
                 test_y[0].shape)

    num_uniq_seqs = len(set(seqs))
    logging.info('Unique sequences: %s of %s (%.2f %%)',
                 humanize.intcomma(num_uniq_seqs),
                 humanize.intcomma(len(seqs)),
                 (num_uniq_seqs / len(seqs)) * 100)
    num_uniq_labels = len(
        set([''.join(str(x) for x in e.reachable) for e in data.entry]))
    logging.info('Unique labels: %s of %s (%.2f %%)',
                 humanize.intcomma(num_uniq_labels),
                 humanize.intcomma(len(seqs)),
                 (num_uniq_labels / len(seqs)) * 100)

    np.random.seed(FLAGS.reachability_model_seed)
    random.seed(FLAGS.reachability_model_seed)
    logging.info('Building Keras model ...')
    model = BuildKerasModel(sequence_length=sequence_length,
                            num_classes=n,
                            lstm_size=FLAGS.lstm_size,
                            num_layers=FLAGS.num_layers,
                            dnn_size=FLAGS.dnn_size,
                            atomizer=atomizer)

    model_json = model.to_json()
    with open(model_dir / 'model.json', 'w') as f:
        f.write(model_json)
    logging.info('Wrote model to %s', model_dir / 'model.json')

    logging.info('Training model ...')

    def OnEpochEnd(epoch, logs):
        """End-of-epoch model evaluate."""
        del logs
        logging.info('Evaluating model at epoch %d', epoch)
        # score, accuracy
        row = model.evaluate(test_x,
                             test_y,
                             batch_size=FLAGS.batch_size,
                             verbose=0)
        overall_loss, losses, accuracies = row[0], row[1:1 + n], row[n + 1:]
        logging.info('Accuracy (excluding first class): %.2f %%',
                     (sum(accuracies[1:]) / len(accuracies[1:])) * 100)

    logger = telemetry.TrainingLogger(logdir=model_dir / 'logs')
    model.fit(
        x,
        y,
        epochs=FLAGS.num_epochs,
        batch_size=FLAGS.batch_size,
        verbose=True,
        shuffle=True,
        callbacks=[
            keras.callbacks.ModelCheckpoint(str(model_dir / 'checkpoints') +
                                            '/weights_{epoch:03d}.hdf5',
                                            verbose=1,
                                            mode="min",
                                            save_best_only=False),
            keras.callbacks.LambdaCallback(on_epoch_end=OnEpochEnd),
            logger.KerasCallback(keras),
        ])

    for i in range(5):
        outs = FlattenModelOutputs(model.predict(np.array([x[i]])))
        logging.info('outs:    %s', outs)
        logging.info('clamped: %s', np.rint(outs).astype(np.int32))
        logging.info('true:    %s', FlattenModelData(y, i))
        logging.info('')
    logging.info('done')