Esempio n. 1
0
def test_Preprocess_mock_preprocessor_bad_code():
  """Test that BadCodeException is propagated."""
  with pytest.raises(errors.BadCodeException):
    preprocessors.Preprocess('', [
        'deeplearning.clgen.preprocessors.preprocessors_test'
        ':MockPreprocessorBadCode'
    ])
Esempio n. 2
0
 def FromContentFile(
         cls, contentfile_root: pathlib.Path, relpath: pathlib.Path,
         preprocessors_: typing.List[str]) -> 'PreprocessedContentFile':
     """Instantiate a PreprocessedContentFile."""
     start_time = time.time()
     preprocessing_succeeded = False
     try:
         with open(contentfile_root / relpath) as f:
             input_text = f.read()
         text = preprocessors.Preprocess(input_text, preprocessors_)
         preprocessing_succeeded = True
     except UnicodeDecodeError as e:
         text = 'Unicode error'
     except errors.BadCodeException as e:
         text = str(e)
     end_time = time.time()
     preprocess_time_ms = int((end_time - start_time) * 1000)
     input_text_stripped = input_text.strip()
     return cls(
         input_relpath=relpath,
         input_sha256=GetFileSha256(contentfile_root / relpath),
         input_charcount=len(input_text_stripped),
         input_linecount=len(input_text_stripped.split('\n')),
         sha256=hashlib.sha256(text.encode('utf-8')).digest(),
         charcount=len(text),
         linecount=len(text.split('\n')),
         text=text,
         preprocessing_succeeded=preprocessing_succeeded,
         preprocess_time_ms=preprocess_time_ms,
         wall_time_ms=preprocess_time_ms,  # The outer-loop may change this.
         date_added=datetime.datetime.utcnow(),
     )
Esempio n. 3
0
def test_Preprocess_mock_preprocessor_internal_error():
  """Test that InternalError is propagated."""
  with pytest.raises(errors.InternalError):
    preprocessors.Preprocess('', [
        'deeplearning.clgen.preprocessors.preprocessors_test'
        ':MockPreprocessorInternalError'
    ])
Esempio n. 4
0
def test_Preprocess_RejectSecrets():
  """Test that InternalError is propagated."""
  with test.Raises(errors.BadCodeException):
    preprocessors.Preprocess(
      "-----BEGIN RSA PRIVATE KEY-----",
      ["deeplearning.clgen.preprocessors.preprocessors" ":RejectSecrets"],
    )
Esempio n. 5
0
  def FromBQFile(
    cls,
    file: bqdb.bqMainFile,
    preprocessors_: typing.List[str],
  ) -> "PreprocessedContentFile":
    """Instantiate a PreprocessedContentFile."""
    start_time = time.time()
    preprocessing_succeeded = False
    try:
      input_text = file.content
      text_generator = preprocessors.Preprocess(input_text, preprocessors_)
      # preprocessing_succeeded = True
    except Exception as e:
      raise("Unexpected exception: {}".format(e))

    end_time = time.time()
    preprocess_time_ms = int((end_time - start_time) * 1000)
    input_text_stripped = input_text.strip()
    return [ cls(
      input_relpath           = "main_files/{}".format(file.id),
      input_sha256            = file.id,
      input_charcount         = len(input_text_stripped),
      input_linecount         = len(input_text_stripped.split("\n")),
      sha256                  = hashlib.sha256(text.encode("utf-8")).hexdigest(),
      charcount               = len(text),
      linecount               = len(text.split("\n")),
      text                    = text,
      preprocessing_succeeded = success,
      preprocess_time_ms      = preprocess_time_ms,
      wall_time_ms            = preprocess_time_ms,  # The outer-loop may change this.
      date_added              = datetime.datetime.utcnow(),
    ) for (text, success) in text_generator ]
Esempio n. 6
0
def test_Preprocess_RejectSecrets():
    """Test that InternalError is propagated."""
    assert (preprocessors.Preprocess(
        "Hello, world!",
        ["deeplearning.clgen.preprocessors.preprocessors"
         ":RejectSecrets"],
    ) == "Hello, world!")
Esempio n. 7
0
def test_Preprocess_RejectSecrets():
  """Test that InternalError is propagated."""
  with pytest.raises(errors.BadCodeException):
    preprocessors.Preprocess(
        '-----BEGIN RSA PRIVATE KEY-----',
        ['deeplearning.clgen.preprocessors.preprocessors'
         ':RejectSecrets'])
Esempio n. 8
0
def test_Preprocess_mock_preprocessor():
    """Test unmodified output if no preprocessors."""
    assert (preprocessors.Preprocess(
        "hello",
        [
            "deeplearning.clgen.preprocessors.preprocessors_test:MockPreprocessor"
        ],
    ) == "PREPROCESSED")
Esempio n. 9
0
def test_Preprocess_mock_preprocessor_internal_error():
    """Test that InternalError is propagated."""
    with test.Raises(errors.InternalError):
        preprocessors.Preprocess(
            "",
            [
                "deeplearning.clgen.preprocessors.preprocessors_test"
                ":MockPreprocessorInternalError"
            ],
        )
Esempio n. 10
0
def test_Preprocess_mock_preprocessor_bad_code():
    """Test that BadCodeException is propagated."""
    with test.Raises(errors.BadCodeException):
        preprocessors.Preprocess(
            "",
            [
                "deeplearning.clgen.preprocessors.preprocessors_test"
                ":MockPreprocessorBadCode"
            ],
        )
Esempio n. 11
0
    def _SampleBatch(
        self,
        sampler: samplers.Sampler,
        atomizer: atomizers.AtomizerBase,
        sample_observers: typing.List[sample_observers_lib.SampleObserver],
    ) -> bool:
        """Run a single iteration of the batched sample inner-loop."""
        start_time = labdate.MillisecondsTimestamp()

        # We're use the sampler.encoded_start_text attribute as a way to re-seed the
        # model state during rollback, so save the original value here so that we
        # can restore it at the end of the sample batch.
        original_sampler_encoded_start_text = sampler.encoded_start_text.copy()

        self.backend.InitSampleBatch(sampler)

        backtracker = OpenClBacktrackingHelper(atomizer, self._target_features)
        self._logger.OnSampleStart(backtracker)
        sampled_tokens = self.SampleOneWithBacktracking(
            sampler, atomizer, backtracker)
        self._logger.OnSampleEnd(backtracker)

        end_time = labdate.MillisecondsTimestamp()

        # Format text.
        if len(sampled_tokens):
            text = preprocessors.Preprocess(
                "".join(sampled_tokens),
                [
                    "deeplearning.clgen.preprocessors.opencl:NormalizeIdentifiers",
                    "deeplearning.clgen.preprocessors.opencl:SanitizeKernelPrototype",
                    "deeplearning.clgen.preprocessors.common:StripTrailingWhitespace",
                    "deeplearning.clgen.preprocessors.opencl:NormalizeIdentifiers",
                    "deeplearning.clgen.preprocessors.common:StripDuplicateEmptyLines",
                    "deeplearning.clgen.preprocessors.cxx:ClangFormat",
                ],
            )
        else:
            text = ""

        # Restore the sampler's start text.
        sampler.encoded_start_text = original_sampler_encoded_start_text

        # Notify sample observers.
        sample = model_pb2.Sample(
            text=text,
            sample_start_epoch_ms_utc=start_time,
            sample_time_ms=end_time - start_time,
            wall_time_ms=end_time - start_time,
            num_tokens=len(sampled_tokens),
        )
        return all([not obs.OnSample(sample) for obs in sample_observers])
Esempio n. 12
0
def _PrettifySource(src: str) -> str:
    """Format an OpenCL source."""
    return preprocessors.Preprocess(
        src,
        [
            "deeplearning.clgen.preprocessors.opencl:NormalizeIdentifiers",
            "deeplearning.clgen.preprocessors.opencl:SanitizeKernelPrototype",
            "deeplearning.clgen.preprocessors.common:StripTrailingWhitespace",
            "deeplearning.clgen.preprocessors.opencl:NormalizeIdentifiers",
            "deeplearning.clgen.preprocessors.common:StripDuplicateEmptyLines",
            "deeplearning.clgen.preprocessors.cxx:ClangFormat",
        ],
    )
Esempio n. 13
0
  def FromContentFile(
    cls,
    contentfile_root: pathlib.Path,
    relpath: pathlib.Path,
    preprocessors_: typing.List[str],
  ) -> "PreprocessedContentFile":
    """Instantiate a PreprocessedContentFile."""
    start_time = time.time()
    input_text = ""
    preprocessing_succeeded = False
    try:
      with open(contentfile_root / relpath) as f:
        try:
          input_text = f.read()
        except UnicodeDecodeError:
          input_text = "/*corrupted file format*/"
        except UnicodeError:
          input_text = "/*corrupted file format*/"
        except Exception:
          input_text = "/*corrupted file format*/"
      text_generator = preprocessors.Preprocess(input_text, preprocessors_)
      # preprocessing_succeeded = True
    except Exception as e:
      raise("Unexpected exception: {}".format(e))

    end_time = time.time()
    preprocess_time_ms = int((end_time - start_time) * 1000)
    input_text_stripped = input_text.strip()
    return [ cls(
      input_relpath           = relpath,
      input_sha256            = GetFileSha256(contentfile_root / (relpath)),
      input_charcount         = len(input_text_stripped),
      input_linecount         = len(input_text_stripped.split("\n")),
      sha256                  = hashlib.sha256(text.encode("utf-8")).hexdigest(),
      charcount               = len(text),
      linecount               = len(text.split("\n")),
      text                    = text,
      preprocessing_succeeded = success,
      preprocess_time_ms      = preprocess_time_ms,
      wall_time_ms            = preprocess_time_ms,  # The outer-loop may change this.
      date_added              = datetime.datetime.utcnow(),
    ) for (text, success) in text_generator ]
Esempio n. 14
0
 def FromContentFile(
   cls,
   contentfile_root: pathlib.Path,
   relpath: pathlib.Path,
   preprocessors_: typing.List[str],
 ) -> "PreprocessedContentFile":
   """Instantiate a PreprocessedContentFile."""
   start_time = time.time()
   input_text = ""
   preprocessing_succeeded = False
   try:
     with open(contentfile_root / relpath) as f:
       input_text = f.read()
     text = preprocessors.Preprocess(input_text, preprocessors_)
     preprocessing_succeeded = True
   except UnicodeDecodeError as e:
     text = "Unicode error"
   except ValueError as e:
     # BadCodeException subclasses ValueError. Catch the more general
     # ValueError so that custom preprocessors can raise ValueError and don't
     # have to depend on CLgen sources.
     text = str(e)
   end_time = time.time()
   preprocess_time_ms = int((end_time - start_time) * 1000)
   input_text_stripped = input_text.strip()
   return cls(
     input_relpath=relpath,
     input_sha256=GetFileSha256(contentfile_root / relpath),
     input_charcount=len(input_text_stripped),
     input_linecount=len(input_text_stripped.split("\n")),
     sha256=hashlib.sha256(text.encode("utf-8")).hexdigest(),
     charcount=len(text),
     linecount=len(text.split("\n")),
     text=text,
     preprocessing_succeeded=preprocessing_succeeded,
     preprocess_time_ms=preprocess_time_ms,
     wall_time_ms=preprocess_time_ms,  # The outer-loop may change this.
     date_added=datetime.datetime.utcnow(),
   )
def _PreprocessBenchmarkInnerLoopBadCode(preprocessors_: typing.List[str],
                                         code_in):
    """Benchmark inner loop for bad code."""
    with test.Raises(errors.BadCodeException):
        preprocessors.Preprocess(code_in, preprocessors_)
def _PreprocessBenchmarkInnerLoop(preprocessors_: typing.List[str],
                                  code_in: str, code_out: str):
    """Benchmark inner loop for code with expected output."""
    assert preprocessors.Preprocess(code_in, preprocessors_) == code_out
Esempio n. 17
0
def test_Preprocess_RejectSecrets():
  """Test that InternalError is propagated."""
  assert preprocessors.Preprocess(
      'Hello, world!',
      ['deeplearning.clgen.preprocessors.preprocessors'
       ':RejectSecrets']) == 'Hello, world!'
Esempio n. 18
0
def test_Preprocess_mock_preprocessor():
  """Test unmodified output if no preprocessors."""
  assert preprocessors.Preprocess('hello', [
    'deeplearning.clgen.preprocessors.preprocessors_test:MockPreprocessor']) \
         == 'PREPROCESSED'
Esempio n. 19
0
def test_Preprocess_no_preprocessors():
  """Test unmodified output if no preprocessors."""
  assert preprocessors.Preprocess('hello', []) == 'hello'