Beispiel #1
0
def test_train_full_pipeline(
    tmpdir: LocalPath,
    dataset_path: str,
    categorical_features: List[str],
    numerical_features: List[str],
    target_col: str,
):
    expected_output_model_path = tmpdir.join("model.pkl")
    expected_metric_path = tmpdir.join("metrics.json")
    expected_report_path = tmpdir.join()
    params = TrainingPipelineParams(
        input_data_path=dataset_path,
        output_model_path=expected_output_model_path,
        metric_path=expected_metric_path,
        report_path=expected_report_path,
        splitting_params=SplittingParams(val_size=0.2, random_state=42),
        feature_params=FeatureParams(
            numerical_features=numerical_features,
            categorical_features=categorical_features,
            target_col=target_col,
        ),
        train_params=TrainingParams(model_type="KNeighborsClassifier"),
    )
    real_model_path, metrics = train_pipeline(params)
    assert metrics["auc"] > 0.5
def test_train_e2e(tmpdir: LocalPath, fake_dataset: str,
                   categorical_features: List[str],
                   numerical_features: List[str], target_col: str,
                   features_to_drop: List[str], config_test):
    categorical_features = list(
        set(categorical_features) - set(numerical_features))
    features_to_drop = list(set(features_to_drop) - set(numerical_features))
    expected_output_model_path = tmpdir.join("model.pkl")
    expected_metric_path = tmpdir.join("metrics.json")
    expected_transformer_path = tmpdir.join("transformer.pkl")
    for model_type in config_test.model_types:
        params = TrainingPipelineParams(
            input_data_path=fake_dataset,
            input_data_url="",
            output_model_path=expected_output_model_path,
            metric_path=expected_metric_path,
            transformer_path=expected_transformer_path,
            splitting_params=SplittingParams(
                val_size=config_test.splitting_val_size,
                random_state=config_test.splitting_random_state,
            ),
            feature_params=FeatureParams(
                numerical_features=numerical_features,
                categorical_features=categorical_features,
                target_col=target_col,
                features_to_drop=features_to_drop,
            ),
            train_params=TrainingParams(model_type=model_type),
        )
        real_model_path, metrics = train_pipeline(params)
        assert metrics["accuracy"] >= config_test.min_accuracy
        assert os.path.exists(real_model_path)
        assert os.path.exists(params.metric_path)
Beispiel #3
0
    def test_updates_existing_entry(
        cache_file: LocalPath,
        hash_file: LocalPath,
        entry_position: CacheEntryPosition,
        final_newline: bool,
    ):
        cache_file.write(rand_string())

        populate_hash_file(
            hash_file,
            cache_file=cache_file,
            cache_position=entry_position,
            final_newline=final_newline,
        )

        cache_file.write(rand_string())
        expected_entry = syphon.hash.HashEntry(cache_file)

        with syphon.hash.HashFile(hash_file) as hashfile:
            hashfile.update(expected_entry)

        actual_entry: Optional[syphon.hash.HashEntry] = None
        with syphon.hash.HashFile(hash_file) as hashfile:
            for actual_entry in hashfile:
                if os.path.samefile(expected_entry.filepath,
                                    actual_entry.filepath):
                    break

        assert actual_entry is not None
        assert expected_entry.binary == actual_entry.binary
        assert os.path.samefile(expected_entry.filepath, actual_entry.filepath)
        assert expected_entry.hash == actual_entry.hash
        assert str(expected_entry) == str(actual_entry)
Beispiel #4
0
    def test_allow_import_of_files_found_in_modules_during_parallel_check(
            self, tmpdir: LocalPath) -> None:
        test_directory = tmpdir / "test_directory"
        test_directory.mkdir()
        spam_module = test_directory / "spam.py"
        spam_module.write("'Empty'")

        init_module = test_directory / "__init__.py"
        init_module.write("'Empty'")

        # For multiple jobs we could not find the `spam.py` file.
        with tmpdir.as_cwd():
            self._runtest(
                [
                    "-j2",
                    "--disable=missing-docstring, missing-final-newline",
                    "test_directory",
                ],
                code=0,
            )

        # A single job should be fine as well
        with tmpdir.as_cwd():
            self._runtest(
                [
                    "-j1",
                    "--disable=missing-docstring, missing-final-newline",
                    "test_directory",
                ],
                code=0,
            )
Beispiel #5
0
def params(tmpdir: LocalPath, numerical_features_yes: List[str],
           target_col: str):

    expected_train_data_path = tmpdir.join("train.csv")
    expected_model_path = tmpdir.join("models.pkl")
    expected_metric_path = tmpdir.join("metrics.json")
    expected_transformer_path = tmpdir.join("transformer.pkl")
    expected_source_data_path = tmpdir.join("source.csv")
    expected_result_data_path = tmpdir.join("result.csv")

    params = Params(
        report_path="",
        train_data_path=expected_train_data_path,
        model_path=expected_model_path,
        features_transformer_path=expected_transformer_path,
        metric_path=expected_metric_path,
        splitting_params=SplittingParams(val_size=0.2, random_state=239),
        train_params=TrainingParams(model_type="RandomForestClassifier"),
        feature_params=FeatureParams(categorical_features=None,
                                     numerical_features=numerical_features_yes,
                                     features_to_drop=None,
                                     target_col=target_col),
        inference_params=InferenceParams(
            source_data_path=expected_source_data_path,
            result_data_path=expected_result_data_path))
    return params
def params(
        tmpdir: LocalPath,
        dataset_path: str,
        categorical_features: List[str],
        numerical_features: List[str],
        target_col: str
):
    expected_output_model_path = tmpdir.join("model.pkl")
    expected_metric_path = tmpdir.join("metrics.json")
    params = PipelineParams(
        train_data_path=dataset_path,
        data_for_pred_path=tmpdir.join('fake_data.csv'),
        predictions_path=tmpdir.join('predictions.csv'),
        transformer_path=tmpdir.join('transformer.pkl'),
        model_path=expected_output_model_path,
        metric_path=expected_metric_path,
        split_params=SplitParams(),
        features_params=FeatureParams(
            numerical=numerical_features,
            categorical=categorical_features,
            target=target_col,
        ),
        train_params=TrainParams(model_type="LogisticRegression", C=1, n_jobs=-1, penalty='l2'),
    )
    return params
Beispiel #7
0
        def __init__(self, root: LocalPath):
            self.prev_dir: str

            # Make directories.
            self.root = root
            self.level1: LocalPath = LocalPath.make_numbered_dir(
                prefix="lvl1-dir", rootdir=self.root, keep=3, lock_timeout=300)
            self.archive: LocalPath = LocalPath.make_numbered_dir(
                prefix="lvl2-dir",
                rootdir=self.level1,
                keep=3,
                lock_timeout=300)
            self.level2: LocalPath = LocalPath.make_numbered_dir(
                prefix="lvl2-dir",
                rootdir=self.level1,
                keep=3,
                lock_timeout=300)

            # Resolve filepaths.
            self._cache0: LocalPath = self.root.join(
                "cache0.csv")  # Relative entry
            self._cache1: LocalPath = self.level1.join(
                "cache1.csv")  # Filename entry
            self._cache2: LocalPath = self.level2.join(
                "cache2.csv")  # Absolute entry
            # NOTE: This class' cache path factory will have to be reconfigured if the
            #       location of the hashfile changes!
            self.hashfile: LocalPath = self.level1.join("sha256sums")

            # Touch files.
            self.hashfile.write("")
Beispiel #8
0
def populate_hash_file(
    hash_file: LocalPath,
    cache_file: Optional[LocalPath] = None,
    cache_position: CacheEntryPosition = CacheEntryPosition.RANDOM,
    final_newline: bool = True,
) -> List[syphon.hash.HashEntry]:
    # Generate hashfile content.
    expected_entries: List[syphon.hash.HashEntry] = [
        syphon.hash.HashEntry(os.path.join(get_data_path(), "empty.csv")),
        syphon.hash.HashEntry(os.path.join(get_data_path(), "iris.csv")),
        syphon.hash.HashEntry(os.path.join(get_data_path(), "iris_plus.csv")),
    ]
    expected_entries = randomize(*expected_entries)

    if cache_file is not None:
        if cache_position == CacheEntryPosition.FIRST:
            expected_entries.insert(0, syphon.hash.HashEntry(cache_file))
        elif cache_position == CacheEntryPosition.RANDOM:
            expected_entries.insert(
                random.randint(1,
                               len(expected_entries) - 1),
                syphon.hash.HashEntry(cache_file),
            )
        elif cache_position == CacheEntryPosition.LAST:
            expected_entries.append(syphon.hash.HashEntry(cache_file))

    hashfile_content = "\n".join([str(e) for e in expected_entries])
    # There's a test that checks for proper handling of files without a trailing
    # newline, so we should make our file with the opposite case.
    hash_file.write(hashfile_content + "\n" if final_newline else "")

    return expected_entries
Beispiel #9
0
def test_train_e2e(
    tmpdir: LocalPath,
    dataset_path: str,
    categorical_features: List[str],
    numerical_features: List[str],
    target_col: str,
    features_to_drop: List[str],
):
    expected_output_model_path = tmpdir.join("model.pkl")
    expected_metric_path = tmpdir.join("metrics.json")
    expected_pretrained_model_path = expected_output_model_path
    expected_predictions_path = tmpdir.join("data/predicted/predictions.csv")
    params = TrainingPipelineParams(
        input_data_path=dataset_path,
        output_model_path=expected_output_model_path,
        metric_path=expected_metric_path,
        pretrained_model_path=expected_pretrained_model_path,
        predictions_path=expected_predictions_path,
        splitting_params=SplittingParams(val_size=0.2, random_state=1234),
        feature_params=FeatureParams(
            numerical_features=numerical_features,
            categorical_features=categorical_features,
            target_col=target_col,
            features_to_drop=features_to_drop,
            use_log_trick=False,
        ),
        train_params=TrainingParams(model_type="LogisticRegression"),
    )
    real_model_path, metrics = train_pipeline(params, LogisticRegression())
    assert metrics["roc_auc"] > 0
    assert os.path.exists(real_model_path)
    assert os.path.exists(params.metric_path)
Beispiel #10
0
def test_does_nothing_when_given_zero_files(
    capsys: CaptureFixture,
    cache_file: LocalPath,
    hash_file: Optional[LocalPath],
    incremental: bool,
    overwrite: bool,
    post_hash: bool,
    verbose: bool,
):
    cache_file.write(rand_string())
    expected_cache_hash: str = syphon.hash.HashEntry(cache_file).hash

    assert not syphon.build(
        cache_file,
        *[],
        hash_filepath=hash_file,
        incremental=incremental,
        overwrite=overwrite,
        post_hash=post_hash,
        verbose=verbose,
    )
    assert_post_hash(False, cache_file, hash_filepath=hash_file)
    assert_captured_outerr(capsys.readouterr(), verbose, False)

    actual_cache_hash: str = syphon.hash.HashEntry(cache_file).hash
    assert expected_cache_hash == actual_cache_hash
def params(
    dataset_path: str,
    tmpdir: LocalPath,
    categorical_features_no: Optional[str],
    numerical_features_yes: List[str],
    target_col: str,
    features_to_drop_no: Optional[str],
):

    expected_output_model_path = tmpdir.join("models.pkl")
    expected_metric_path = tmpdir.join("metrics.json")
    expected_features_transformer_path = tmpdir.join(
        "features_transformer.pkl")

    params = Params(
        report_path="",
        train_data_path=dataset_path,
        model_path=expected_output_model_path,
        features_transformer_path=expected_features_transformer_path,
        metric_path=expected_metric_path,
        splitting_params=SplittingParams(val_size=0.2, random_state=239),
        train_params=TrainingParams(model_type="RandomForestClassifier"),
        feature_params=FeatureParams(
            numerical_features=numerical_features_yes,
            categorical_features=categorical_features_no,
            target_col=target_col,
            features_to_drop=features_to_drop_no),
        inference_params=InferenceParams(source_data_path="",
                                         result_data_path=""))

    return params
Beispiel #12
0
class ScriptData(object):
    def __init__(self, tmpdir):
        self.tmpdir = tmpdir
        self.tmpdata = None
        self.pristine_data = LocalPath(os.path.dirname(__file__)).join('data')
        self.installed_packages = None

    def copy_data(self):
        if self.tmpdata and self.tmpdata.exists():
            self.tmpdata.remove(ignore_errors=True)
        self.tmpdata = self.tmpdir.mkdir('data')
        self.pristine_data.copy(self.tmpdata, mode=True)

        # Can't add .git directories to the index
        git_no_scan = self.pristine_data.join('scripts/project/.hg')
        hg_no_scan = self.tmpdata.join('scripts/project/.git')
        git_no_scan.copy(hg_no_scan)

        return self.tmpdata

    def sysexec(self, script):
        print('Executing Script: %s' % script)
        return script.sysexec(cwd=str(script.dirpath()))

    def verify_data(self):
        if not self.tmpdata:
            return False

        for prissy in self.pristine_data.visit():
            assert prissy.ext != '.pyc', \
                'Pristine has Python bytecode indicating execution from pristine directory!'

            rel = prissy.relto(self.pristine_data)
            tmp = self.tmpdata.join(rel)

            if prissy.check(dir=True):
                assert tmp.check(dir=True), 'Data integirty test failed: %s' % rel
            elif prissy.check(file=True):
                assert tmp.check(file=True), 'Data integirty test failed: %s' % rel
                assert prissy.computehash() == tmp.computehash(), 'Hash mismatch: %s' % rel

        for tmp in self.tmpdata.visit():
            if '.git' in tmp.strpath or '__pycache__' in tmp.strpath or tmp.ext == '.pyc':
                continue

            rel = tmp.relto(self.tmpdata)
            prissy = self.pristine_data.join(rel)

            if tmp.check(dir=True):
                assert prissy.check(dir=True), 'Directory created in tmpdir: %s' % rel
            elif tmp.check(file=True):
                assert prissy.check(file=True), 'File created in tmpdir: %s' % rel

        return True

    def copy_installed(self):
        if not self.installed_packages:
            self.installed_packages = installed_packages()
        return copy.deepcopy(self.installed_packages)
Beispiel #13
0
def gettree(lp: LocalPath):
    assert lp.check()
    if lp.isdir():
        return {df.basename: gettree(df) for df in lp.listdir()}
    elif lp.isfile():
        return lp.read_text("utf8")
    else:
        raise Exception("not directory or file: {}".format(lp))
def test_tmpdir(tmpdir: local.LocalPath) -> None:
    file_name: str = "hello.cpp"
    file_content: str = "#include <iostream>"
    p = tmpdir.join(file_name)
    p.write(file_content)

    assert tmpdir.join(file_name).isfile()
    assert p.read() == file_content
Beispiel #15
0
 def pgdir(self):
     """Retrieve the set playground directory"""
     for parent in search_parent_directories():
         pgdir = Path(parent).join(self.pgconf['pgdir'], abs=1)
         if pgdir.check(dir=True):
             return pgdir
     raise NoPlayground("could not find any directory named '%s'" %
                        self.pgconf['pgdir'])
Beispiel #16
0
    def test_increment_without_metadata_without_schema(
        self,
        capsys: CaptureFixture,
        archive_dir: LocalPath,
        archive_fixture: "TestArchive.ArchiveCacheAndHashPassthruChecker",
        schema_file: Optional[LocalPath],
        verbose: bool,
    ):
        # List of (expected frame filename, data filename) tuples
        targets: List[Tuple[str, str]] = [
            ("iris-part-1-of-6-combined.csv", "iris-part-1-of-6.csv"),
            ("iris-part-1-2.csv", "iris-part-2-of-6.csv"),
            ("iris-part-1-2-3.csv", "iris-part-3-of-6.csv"),
            ("iris-part-1-2-3-4.csv", "iris-part-4-of-6.csv"),
            ("iris-part-1-2-3-4-5.csv", "iris-part-5-of-6.csv"),
            ("iris_plus.csv", "iris-part-6-of-6.csv"),
        ]

        expected_hashfile = (
            LocalPath(archive_fixture.cache_file).dirpath(DEFAULT_HASH_FILE) if
            archive_fixture.hash_file is None else archive_fixture.hash_file)
        assert not os.path.exists(expected_hashfile)
        assert not os.path.exists(archive_fixture.cache_file)
        assert len(archive_dir.listdir()) == 0

        for expected_frame_filename, data_filename in targets:
            assert archive_fixture(
                archive_dir,
                [os.path.join(get_data_path(), data_filename)],
                cache_filepath=archive_fixture.cache_file,
                hash_filepath=archive_fixture.hash_file,
                verbose=verbose,
            )
            assert_captured_outerr(capsys.readouterr(), verbose, False)

            expected_frame = DataFrame(
                read_csv(
                    os.path.join(get_data_path(), expected_frame_filename),
                    dtype=str,
                    index_col="Index",
                ))
            del expected_frame["Species"]
            del expected_frame["PetalColor"]
            expected_frame.sort_index(inplace=True)
            actual_frame = DataFrame(
                read_csv(str(archive_fixture.cache_file),
                         dtype=str,
                         index_col="Index"))
            actual_frame.sort_index(inplace=True)
            assert_captured_outerr(capsys.readouterr(), False, False)

            assert_frame_equal(expected_frame, actual_frame)
            assert os.path.exists(expected_hashfile)
            assert syphon.check(
                archive_fixture.cache_file,
                hash_filepath=expected_hashfile,
                verbose=verbose,
            )
Beispiel #17
0
def test_client_private_key_path(
        patched_contract,
        monkeypatch: MonkeyPatch,
        sender_privkey: str,
        tmpdir: LocalPath,
        web3: Web3,
        channel_manager_address: str
):
    def check_permission_safety_patched(path: str):
        return True

    monkeypatch.setattr(
        microraiden.utils.private_key,
        'check_permission_safety',
        check_permission_safety_patched
    )

    privkey_file = tmpdir.join('private_key.txt')
    privkey_file.write(sender_privkey)

    with pytest.raises(AssertionError):
        Client(
            private_key='0xthis_is_not_a_private_key',
            channel_manager_address=channel_manager_address,
            web3=web3
        )

    with pytest.raises(AssertionError):
        Client(
            private_key='0xcorrect_length_but_still_not_a_private_key_12345678901234567',
            channel_manager_address=channel_manager_address,
            web3=web3
        )

    with pytest.raises(AssertionError):
        Client(
            private_key='/nonexisting/path',
            channel_manager_address=channel_manager_address,
            web3=web3
        )

    Client(
        private_key=sender_privkey,
        channel_manager_address=channel_manager_address,
        web3=web3
    )

    Client(
        private_key=sender_privkey[2:],
        channel_manager_address=channel_manager_address,
        web3=web3
    )

    Client(
        private_key=str(tmpdir.join('private_key.txt')),
        channel_manager_address=channel_manager_address,
        web3=web3
    )
Beispiel #18
0
def test_openhashfile_tell(hash_file: LocalPath):
    hash_file.write(rand_string())

    openhashfile = syphon.hash._OpenHashFile(hash_file, "")
    assert openhashfile.tell() == 0
    assert openhashfile.tell() == openhashfile._file_obj.tell()

    line = openhashfile._file_obj.readline()
    assert openhashfile._file_obj.tell() == len(line)
Beispiel #19
0
Datei: cli.py Projekt: Yelp/pgctl
 def pgdir(self):
     """Retrieve the set playground directory"""
     for parent in search_parent_directories():
         pgdir = Path(parent).join(self.pgconf['pgdir'], abs=1)
         if pgdir.check(dir=True):
             return pgdir
     raise NoPlayground(
         "could not find any directory named '%s'" % self.pgconf['pgdir']
     )
def create_subdirectories(path, amount, depth):
    path = str(path)
    for x in range(amount):
        p = LocalPath(path).join(str(x))
        p.mkdir()
        _f = p.join("testreport.xml")
        _f.write("")
        if not depth == 0:
            depth -= 1
            create_subdirectories(p, 1, depth)
Beispiel #21
0
def venv_update_symlink_pwd():
    # I wish I didn't need this =/
    # surely there's a better way -.-
    # NOTE: `pip install TOP` causes an infinite copyfiles loop, under tox >.<
    from venv_update import __file__ as venv_update_path, dotpy

    # symlink so that we get coverage, where possible
    venv_update_path = Path(dotpy(venv_update_path))
    local_vu = Path(venv_update_path.basename)
    local_vu.mksymlinkto(venv_update_path)
Beispiel #22
0
def venv_update_symlink_pwd():
    # I wish I didn't need this =/
    # surely there's a better way -.-
    # NOTE: `pip install TOP` causes an infinite copyfiles loop, under tox >.<
    from venv_update import __file__ as venv_update_path, dotpy

    # symlink so that we get coverage, where possible
    venv_update_path = Path(dotpy(venv_update_path))
    local_vu = Path(venv_update_path.basename)
    local_vu.mksymlinkto(venv_update_path)
Beispiel #23
0
def test_openhashfile_close(hash_file: LocalPath):
    hash_file.write(rand_string())

    openhashfile = syphon.hash._OpenHashFile(hash_file, "")

    openhashfile.close()
    try:
        assert openhashfile._file_obj.closed
    finally:
        openhashfile._file_obj.close()
Beispiel #24
0
def main():
    from sys import argv
    argv = argv[1:]
    sources, destination = argv[:-1], argv[-1]

    from py._path.local import LocalPath
    sources = tuple([LocalPath(src) for src in sources])
    destination = LocalPath(destination)

    return make_sdists(sources, destination)
Beispiel #25
0
def venv_update_script(pyscript, venv='venv'):
    """Run a python script that imports venv_update"""

    # symlink so that we get coverage, where possible
    venv_update_symlink_pwd()

    # write it to a file so we get more-reasonable stack traces
    testscript = Path('testscript.py')
    testscript.write(pyscript)
    return run('%s/bin/python' % venv, testscript.strpath)
Beispiel #26
0
 def service_by_name(self, service_name):
     """Return an instantiated Service, by name."""
     if os.path.isabs(service_name):
         path = Path(service_name)
     else:
         path = self.pgdir.join(service_name, abs=1)
     return Service(
         path,
         self.pghome.join(path.relto(str('/')), abs=1),
         self.pgconf['timeout'],
     )
Beispiel #27
0
def create_subdirectories(path, amount, depth):
    path = str(path)
    for x in range(amount):
        p = LocalPath(path).join(str(depth))
        p.mkdir()
        for ext in [".a", ".b", ".c", ".d", ""]:
            _f = p.join("testfile%s" % ext)
            _f.write("")
        if not depth == 0:
            depth -= 1
            create_subdirectories(p, 1, depth)
Beispiel #28
0
Datei: cli.py Projekt: Yelp/pgctl
 def service_by_name(self, service_name):
     """Return an instantiated Service, by name."""
     if os.path.isabs(service_name):
         path = Path(service_name)
     else:
         path = self.pgdir.join(service_name, abs=1)
     return Service(
         path,
         self.pghome.join(path.relto(str('/')), abs=1),
         self.pgconf['timeout'],
     )
Beispiel #29
0
def main():
    assert 'PIP_INDEX_URL' not in os.environ, os.environ['PIP_INDEX_URL']
    from sys import argv
    argv = argv[1:]
    sources, destination = argv[:-1], argv[-1]

    from py._path.local import LocalPath
    sources = tuple([LocalPath(src) for src in sources])
    destination = LocalPath(destination)

    return make_sdists(sources, destination)
Beispiel #30
0
    def test_raises_valueerror_on_mismatching_hash_type(
            cache_file: LocalPath, hash_file: LocalPath):
        cache_file.write(rand_string())

        hash_file.write("")

        entry = syphon.hash.HashEntry(cache_file)
        entry.hash_type = "md5"

        with pytest.raises(ValueError, match=entry.hash_type):
            with syphon.hash.HashFile(hash_file) as hashfile:
                hashfile.update(entry)
Beispiel #31
0
    def test_build_no_hash(archive_dir: LocalPath, cache_file: LocalPath):
        assert not os.path.exists(cache_file)
        assert syphon.__main__.main(_init_args(archive_dir)) == 0
        assert syphon.__main__.main(_archive_args(archive_dir)) == 0

        arguments = _build_args(archive_dir, cache_file)
        arguments.append("--no-hash")

        assert syphon.__main__.main(arguments) == 0
        assert os.path.exists(cache_file)
        assert not os.path.exists(
            cache_file.dirpath(syphon.core.check.DEFAULT_FILE))
        assert cache_file.size() > 0
Beispiel #32
0
def gettree(lp: LocalPath, max_len=120):
    """
    Get a dict representing the file tree for a directory
    """
    assert lp.check()
    if lp.isdir():
        return {df.basename: gettree(df, max_len=max_len) for df in lp.listdir()}
    else:
        assert lp.isfile()
        content = lp.read_text('utf8')
        if max_len and len(content) > max_len:
            content = content[:max_len - 3] + '...'
        return content
Beispiel #33
0
def test_hashfile_is_a_context_manager(hash_file: LocalPath):
    hash_file.write(rand_string())

    hashfile = syphon.hash.HashFile(hash_file)
    assert hashfile._count == 0
    assert hashfile._file is None

    with hashfile as _:
        assert hashfile._count == 1
        assert isinstance(hashfile._file, syphon.hash._OpenHashFile)
        assert hashfile._file._file_obj.readable()
        assert hashfile._file._file_obj.writable()

    assert hashfile._count == 0
    assert hashfile._file is None
Beispiel #34
0
def test_openhashfile_init(hash_file: LocalPath, hash_type: Optional[str]):
    if hash_type is None:
        hash_type = syphon.hash.DEFAULT_HASH_TYPE

    expected_content = rand_string()
    hash_file.write(expected_content)

    openhashfile = syphon.hash._OpenHashFile(hash_file, hash_type)
    try:
        assert expected_content == openhashfile._file_obj.readline()
        assert not openhashfile._file_obj.closed
        assert openhashfile.hash_type == hash_type
        assert openhashfile.line_split is None
    finally:
        openhashfile._file_obj.close()
Beispiel #35
0
def test_scripts_left_behind(tmpdir):
    tmpdir.chdir()
    requirements('')

    venv_update()

    # an arbitrary small package with a script: pep8
    script_path = Path('venv/bin/pep8')
    assert not script_path.exists()

    run('venv/bin/pip', 'install', 'pep8')
    assert script_path.exists()

    venv_update()
    assert not script_path.exists()
Beispiel #36
0
def test_scripts_left_behind(tmpdir):
    tmpdir.chdir()
    get_scenario('trivial')

    venv_update()

    # an arbitrary small package with a script: pep8
    script_path = Path('virtualenv_run/bin/pep8')
    assert not script_path.exists()

    pip('install', 'pep8')
    assert script_path.exists()

    venv_update()
    assert not script_path.exists()
Beispiel #37
0
def test_scripts_left_behind(tmpdir):
    tmpdir.chdir()
    requirements('')

    venv_update()

    # an arbitrary small package with a script: pep8
    script_path = Path('virtualenv_run/bin/pep8')
    assert not script_path.exists()

    run('virtualenv_run/bin/pip', 'install', 'pep8')
    assert script_path.exists()

    venv_update()
    assert not script_path.exists()
Beispiel #38
0
def mergesort(filename, output=None, key=None, maxitems=1e6, progress=True):
    """Given an input file sort it by performing a merge sort on disk.

    :param filename: Either a filename as a ``str`` or a ``py._path.local.LocalPath`` instance.
    :type filename:  ``str`` or ``py._path.local.LocalPath``

    :param output: An optional output filename as a ``str`` or a ``py._path.local.LocalPath`` instance.
    :type output:  ``str`` or ``py._path.local.LocalPath`` or ``None``

    :param key: An optional key to sort the data on.
    :type key:  ``function`` or ``None``

    :param maxitems: Maximum number of items to hold in memory at a time.
    :type maxitems:  ``int``

    :param progress: Whether or not to display a progress bar
    :type progress: ``bool``

    This uses ``py._path.local.LocalPath.make_numbered_dir`` to create temporry scratch space to work
    with when splitting the input file into sorted chunks. The mergesort is processed iteratively in-memory
    using the ``~merge`` function which is almost identical to ``~heapq.merge`` but adds in the support of
    an optional key function.
    """

    p = filename if isinstance(filename, LocalPath) else LocalPath(filename)
    output = p if output is None else output
    key = key if key is not None else lambda x: x

    scratch = LocalPath.make_numbered_dir(prefix="mergesort-")

    nlines = sum(1 for line in p.open("r"))

    # Compute a reasonable chunksize < maxitems
    chunksize = first(ifilter(lambda x: x < maxitems, imap(lambda x: nlines / (2**x), count(1))))

    # Split the file up into n sorted files
    if progress:
        bar = ProgressBar("Split/Sorting Data", max=(nlines / chunksize))
    for i, items in enumerate(ichunks(chunksize, jsonstream(p))):
        with scratch.ensure("{0:d}.json".format(i)).open("w") as f:
            f.write("\n".join(map(dumps, sorted(items, key=key))))
        if progress:
            bar.next()
    if progress:
        bar.finish()

    q = scratch.listdir("*.json")
    with output.open("w") as f:
        if progress:
            bar = ProgressBar("Merge/Sorting Data", max=nlines)
        for item in merge(*imap(jsonstream, q)):
            f.write("{0:s}\n".format(dumps(item)))
            if progress:
                bar.next()
        if progress:
            bar.finish()
Beispiel #39
0
def assert_timestamps(*reqs):
    firstreq = Path(reqs[0])
    lastreq = Path(reqs[-1])
    args = ['install='] + sum([['-r', req] for req in reqs], [])

    venv_update(*args)

    assert firstreq.mtime() < Path('venv').mtime()

    # garbage, to cause a failure
    lastreq.write('-w wat')

    with pytest.raises(CalledProcessError) as excinfo:
        venv_update(*args)

    assert excinfo.value.returncode == 1
    assert firstreq.mtime() > Path('venv').mtime()

    # blank requirements should succeed
    lastreq.write('')

    venv_update(*args)
    assert firstreq.mtime() < Path('venv').mtime()
Beispiel #40
0
def assert_timestamps(*reqs):
    firstreq = Path(reqs[0])
    lastreq = Path(reqs[-1])

    venv_update('--python=python', 'virtualenv_run', *reqs)

    assert firstreq.mtime() < Path('virtualenv_run').mtime()

    # garbage, to cause a failure
    lastreq.write('-w wat')

    from subprocess import CalledProcessError
    with pytest.raises(CalledProcessError) as excinfo:
        venv_update('virtualenv_run', *reqs)

    assert excinfo.value.returncode == 1
    assert firstreq.mtime() > Path('virtualenv_run').mtime()

    # blank requirements should succeed
    lastreq.write('')

    venv_update('virtualenv_run', *reqs)
    assert Path(reqs[0]).mtime() < Path('virtualenv_run').mtime()
Beispiel #41
0
def install_coverage(venv='venv'):
    venv = Path(venv)
    if not venv.exists():
        run('virtualenv', str(venv))
    run(str(venv.join('bin/python')), '-m', 'pip.__main__', 'install', '-r', str(COVERAGE_REQS))
Beispiel #42
0
# NOTE WELL: No side-effects are allowed in __init__ files. This means you!
from __future__ import absolute_import
from __future__ import print_function
from __future__ import unicode_literals

import os
from re import compile as Regex
from re import MULTILINE

from pip._internal.wheel import Wheel
from py._path.local import LocalPath as Path

TOP = Path(__file__) / '../../..'
COVERAGE_REQS = TOP.join('requirements.d/coverage.txt')


def requirements(reqs, path='requirements.txt'):
    """Write a requirements.txt file to the current working directory."""
    Path(path).write(reqs)


def run(*cmd, **env):
    if env:
        from os import environ
        tmp = env
        env = environ.copy()
        env.update(tmp)
    else:
        env = None

    from .capture_subprocess import capture_subprocess
Beispiel #43
0
def it_removes_down_file():
    path = Path(os.getcwd()).join('playground/slow-startup/down')
    path.ensure()
    assert path.check()
    it_can_succeed()
    assert not path.check()
Beispiel #44
0
 def __init__(self, tmpdir):
     self.tmpdir = tmpdir
     self.tmpdata = None
     self.pristine_data = LocalPath(os.path.dirname(__file__)).join('data')
     self.installed_packages = None