예제 #1
0
    def Create(self) -> None:
        """Create the corpus files.

    Raises:
      EmptyCorpusException: If there are no content files, or no successfully
        pre-processed files.
    """
        self._created = True
        logging.info('Content ID: %s', self.content_id)
        preprocessed_lock_path = self.preprocessed.database_path.parent / 'LOCK'
        with lockfile.LockFile(preprocessed_lock_path).acquire(
                replace_stale=True, block=True):
            self.preprocessed.Create(self.config)
        if not self.preprocessed.size:
            raise errors.EmptyCorpusException(
                "Pre-processed corpus contains no files: "
                f"'{self.preprocessed.database_path}'")
        encoded_lock_path = self.encoded.database_path.parent / 'LOCK'
        with lockfile.LockFile(encoded_lock_path).acquire(replace_stale=True,
                                                          block=True):
            start_time = time.time()
            atomizer = self.atomizer
            logging.info(
                '%s: %s tokens in %s ms',
                type(atomizer).__name__,
                humanize.intcomma(atomizer.vocab_size),
                humanize.intcomma(int((time.time() - start_time) * 1000)))
            for key, value in atomizer.vocab.items():
                logging.info('atomizer.vocab  %s : %s', key, value)
            self.encoded.Create(self.preprocessed, atomizer,
                                self.config.contentfile_separator)
예제 #2
0
 def is_locked(self) -> bool:
     """Return whether the corpus is locked."""
     preprocessed_lock_path = self.preprocessed.database_path.parent / 'LOCK'
     if lockfile.LockFile(preprocessed_lock_path).islocked:
         return True
     encoded_lock_path = self.encoded.database_path.parent / 'LOCK'
     if lockfile.LockFile(encoded_lock_path).islocked:
         return True
     return False
예제 #3
0
def test_LockFile_release_fail(dummy_lockfile_path):
  """Test that releasing a lock owned by a different host fails."""
  lock = lockfile.LockFile(dummy_lockfile_path)
  assert lock.islocked
  with pytest.raises(lockfile.UnableToReleaseLockError) as e_ctx:
    lock.release()
  assert str(e_ctx.value) == f"""\
예제 #4
0
def test_LockFile_release_deletes_file():
  """Test that lockfile is removed after lockfile.release()."""
  with tempfile.TemporaryDirectory() as d:
    path = pathlib.Path(d) / 'LOCK'
    lock = lockfile.LockFile(path)
    lock.acquire()
    lock.release()
    assert not lock.path.is_file()
예제 #5
0
def test_LockFile_owned_by_self():
  """Test that lockfile.owned_by_self returns True after acquired."""
  with tempfile.TemporaryDirectory() as d:
    path = pathlib.Path(d) / 'LOCK'
    lock = lockfile.LockFile(path)
    assert not lock.owned_by_self
    lock.acquire()
    assert lock.owned_by_self
예제 #6
0
def test_LockFile_islocked():
  """Test that lockfile.islocked returns True after acquired."""
  with tempfile.TemporaryDirectory() as d:
    path = pathlib.Path(d) / 'LOCK'
    lock = lockfile.LockFile(path)
    assert not lock.islocked
    lock.acquire()
    assert lock.islocked
예제 #7
0
def test_LockFile_file_exists():
  """Test that lockfile is created on acquire."""
  with tempfile.TemporaryDirectory() as d:
    path = pathlib.Path(d) / 'LOCK'
    lock = lockfile.LockFile(path)
    assert not lock.path.is_file()
    lock.acquire()
    assert lock.path.is_file()
예제 #8
0
def IsEligible(instance: clgen.Instance) -> bool:
    """Return whether an instance is eligible for training or sampling."""
    if instance.model.corpus.is_locked:
        logging.info('Corpus is locked')
        return False
    if instance.model.training_lock.islocked:
        logging.info('Model is locked')
        return False
    sample_dir = instance.model.SamplerCache(instance.sampler)
    sample_lock = lockfile.LockFile(sample_dir / 'LOCK')
    if sample_lock.islocked:
        logging.info('Sampler is locked')
        return False
    return True
예제 #9
0
def test_LockFile_force_replace_stale():
  """Test that lockfile is replaced if forced."""
  with tempfile.TemporaryDirectory() as d:
    path = pathlib.Path(d) / 'LOCK'
    lock = lockfile.LockFile(path)
    MAX_PROCESSES = 4194303  # OS-dependent. This value is for Linux
    lock.acquire(pid=MAX_PROCESSES + 1)
    assert lock.islocked
    assert not lock.owned_by_self
    with pytest.raises(lockfile.UnableToAcquireLockError):
      lock.acquire()
    lock.acquire(force=True)
    assert lock.islocked
    assert lock.owned_by_self
    lock.release()
    assert not fs.exists(lock.path)
예제 #10
0
def SampleModel(instance: clgen.Instance) -> None:
    """Take --output_corpus_size samples from model."""
    logging.info('Training and sampling the CLgen model ...')
    target_samples = FLAGS.output_corpus_size
    sample_dir = instance.model.SamplerCache(instance.sampler)
    sample_dir.mkdir(exist_ok=True)
    num_samples = len(list(sample_dir.iterdir()))
    logging.info('Need to generate %d samples in %s',
                 max(target_samples - num_samples, 0), sample_dir)
    if num_samples < target_samples:
        sample_lock = lockfile.LockFile(sample_dir / 'LOCK')
        with sample_lock.acquire(replace_stale=True, block=True):
            num_samples = len(list(sample_dir.iterdir()))
            while num_samples < target_samples:
                samples = instance.model.SampleFast(
                    instance.sampler, target_samples - num_samples)
                for sample in samples:
                    sample_id = crypto.sha256_str(sample.text)
                    pbutil.ToFile(sample, sample_dir / f'{sample_id}.pbtxt')
                num_samples = len(list(sample_dir.iterdir()))
예제 #11
0
 def training_lock(self) -> lockfile.LockFile:
     """A lockfile for exclusive training."""
     return lockfile.LockFile(self.cache.keypath('LOCK'))