Python download_check_etag Beispiele, bugbug.utils.download_check_etag Python Beispiele

Beispiel #1

0

Datei anzeigen

def classify_bugs(model_name, classifier, bug_id):
    if classifier != "default":
        assert (
            model_name in MODELS_WITH_TYPE
        ), f"{classifier} is not a valid classifier type for {model_name}"

        model_file_name = f"{model_name}{classifier}model"
        model_name = f"{model_name}_{classifier}"
    else:
        model_file_name = f"{model_name}model"

    if not os.path.exists(model_file_name):
        logger.info(f"{model_file_name} does not exist. Downloading the model....")
        try:
            download_check_etag(
                f"https://index.taskcluster.net/v1/task/project.relman.bugbug.train_{model_name}.latest/artifacts/public/{model_file_name}.zst",
                f"{model_file_name}.zst",
            )
        except requests.HTTPError:
            logger.error(
                f"A pre-trained model is not available, you will need to train it yourself using the trainer script"
            )
            raise SystemExit(1)

        zstd_decompress(model_file_name)
        assert os.path.exists(model_file_name), "Decompressed file doesn't exist"

    model_class = get_model_class(model_name)
    model = model_class.load(model_file_name)

    if bug_id:
        bugs = bugzilla.get(bug_id).values()
        assert bugs, f"A bug with a bug id of {bug_id} was not found"
    else:
        bugs = bugzilla.get_bugs()

    for bug in bugs:
        print(
            f'https://bugzilla.mozilla.org/show_bug.cgi?id={bug["id"]} - {bug["summary"]} '
        )

        if model.calculate_importance:
            probas, importance = model.classify(
                bug, probabilities=True, importances=True
            )

            model.print_feature_importances(
                importance["importances"], class_probabilities=probas
            )

            with open("importance.html", "w") as f:
                f.write(importance["html"])
        else:
            probas = model.classify(bug, probabilities=True, importances=False)

        if np.argmax(probas) == 1:
            print(f"Positive! {probas}")
        else:
            print(f"Negative! {probas}")
        input()

Beispiel #2

0

Datei anzeigen

def download_model(model_url, file_path):
    logger.info(
        f"Downloading model from {model_url!r} and save it in {file_path!r}")
    download_check_etag(model_url)

    zstd_decompress(file_path)
    logger.info(f"Written model in {file_path}")

Beispiel #3

0

Datei anzeigen

Datei: commit_classifier.py Projekt: e7dal/bugbug

    def __init__(self, model_name, repo_dir, git_repo_dir, method_defect_predictor_dir):
        self.model_name = model_name
        self.repo_dir = repo_dir

        self.model = download_and_load_model(model_name)
        assert self.model is not None

        self.git_repo_dir = git_repo_dir
        if git_repo_dir:
            self.clone_git_repo("https://github.com/mozilla/gecko-dev", git_repo_dir)

        self.method_defect_predictor_dir = method_defect_predictor_dir
        if method_defect_predictor_dir:
            self.clone_git_repo(
                "https://github.com/lucapascarella/MethodDefectPredictor",
                method_defect_predictor_dir,
                "8cc47f47ffb686a29324435a0151b5fabd37f865",
            )

        if model_name == "regressor":
            self.use_test_history = False

            model_data_X_path = f"{model_name}model_data_X"
            updated = download_check_etag(
                URL.format(model_name=model_name, file_name=f"{model_data_X_path}.zst")
            )
            if updated:
                zstd_decompress(model_data_X_path)
            assert os.path.exists(model_data_X_path), "Decompressed X dataset exists"

            model_data_y_path = f"{model_name}model_data_y"
            updated = download_check_etag(
                URL.format(model_name=model_name, file_name=f"{model_data_y_path}.zst")
            )
            if updated:
                zstd_decompress(model_data_y_path)
            assert os.path.exists(model_data_y_path), "Decompressed y dataset exists"

            self.X = to_array(joblib.load(model_data_X_path))
            self.y = to_array(joblib.load(model_data_y_path))

            past_bugs_by_function_path = "data/past_bugs_by_function.pickle"
            download_check_etag(
                PAST_BUGS_BY_FUNCTION_URL, path=f"{past_bugs_by_function_path}.zst"
            )
            zstd_decompress(past_bugs_by_function_path)
            assert os.path.exists(past_bugs_by_function_path)
            with open(past_bugs_by_function_path, "rb") as f:
                self.past_bugs_by_function = pickle.load(f)

        if model_name == "testlabelselect":
            self.use_test_history = True
            assert db.download_support_file(
                test_scheduling.TEST_LABEL_SCHEDULING_DB,
                test_scheduling.PAST_FAILURES_LABEL_DB,
            )
            self.past_failures_data = test_scheduling.get_past_failures("label")

            self.testfailure_model = download_and_load_model("testfailure")
            assert self.testfailure_model is not None

Beispiel #4

0

Datei anzeigen

def main(args):

    model_file_name = f"{similarity.model_name_to_class[args.algorithm].__name__.lower()}.similaritymodel"

    if not os.path.exists(model_file_name):
        logger.info(
            f"{model_file_name} does not exist. Downloading the model....")
        try:
            download_check_etag(URL.format(model_file_name))
        except requests.HTTPError:
            logger.error(
                f"A pre-trained model is not available, you will need to train it yourself using the trainer script"
            )
            raise SystemExit(1)

        zstd_decompress(model_file_name)
        assert os.path.exists(
            model_file_name), "Decompressed file doesn't exist"

    model = similarity.model_name_to_class[args.algorithm].load(
        f"{similarity.model_name_to_class[args.algorithm].__name__.lower()}.similaritymodel"
    )

    bug_ids = model.get_similar_bugs(bugzilla.get(args.bug_id)[args.bug_id])

    bugs = {}
    for bug in bugzilla.get_bugs():
        if bug["id"] in bug_ids or bug["id"] == args.bug_id:
            bugs[bug["id"]] = bug

    print("{}: {}".format(args.bug_id, bugs[args.bug_id]["summary"]))
    for bug_id in bug_ids:
        print("{}: {}".format(bug_id, bugs[bug_id]["summary"]))

Beispiel #5

0

Datei anzeigen

Datei: test_utils.py Projekt: mahirtaq/bugbug

def test_download_check_etag_unchanged():
    url = "https://community-tc.services.mozilla.com/api/index/v1/task/project.relman.bugbug/prova.txt"

    responses.add(
        responses.HEAD,
        url,
        status=200,
        headers={"ETag": "123", "Last-Modified": "2019-04-16",},
    )

    responses.add(responses.GET, url, status=200, body="prova")

    responses.add(
        responses.HEAD,
        url,
        status=200,
        headers={"ETag": "123", "Last-Modified": "2019-04-16",},
    )

    responses.add(responses.GET, url, status=200, body="prova2")

    utils.download_check_etag(url, "prova.txt")

    assert os.path.exists("prova.txt")

    with open("prova.txt", "r") as f:
        assert f.read() == "prova"

    utils.download_check_etag(url, "prova.txt")

    assert os.path.exists("prova.txt")

    with open("prova.txt", "r") as f:
        assert f.read() == "prova"

Beispiel #6

0

Datei anzeigen

Datei: test_utils.py Projekt: mahirtaq/bugbug

def test_download_check_missing():
    url = "https://community-tc.services.mozilla.com/api/index/v1/task/project.relman.bugbug/prova.txt"

    responses.add(
        responses.HEAD,
        url,
        status=404,
        headers={"ETag": "123", "Last-Modified": "2019-04-16",},
    )

    responses.add(
        responses.GET, url, status=404, body=requests.exceptions.HTTPError("HTTP error")
    )

    url_fallback = url.replace(
        "https://community-tc.services.mozilla.com/api/index",
        "https://index.taskcluster.net",
    )

    responses.add(
        responses.HEAD, url_fallback, status=404, headers={"ETag": "123"},
    )

    responses.add(
        responses.GET,
        url_fallback,
        status=404,
        body=requests.exceptions.HTTPError("HTTP error"),
    )

    with pytest.raises(requests.exceptions.HTTPError, match="HTTP error"):
        utils.download_check_etag(url, "prova.txt")

    assert not os.path.exists("prova.txt")

Beispiel #7

0

Datei anzeigen

Datei: regressor_finder.py Projekt: thelonewolff/bugbug

def download_model(model_name):
    if not os.path.exists(f"{model_name}model"):
        url = BASE_URL.format(model_name=model_name)
        logger.info(f"Downloading {url}...")
        download_check_etag(url, f"{model_name}model.zst")
        zstd_decompress(f"{model_name}model")
        assert os.path.exists(f"{model_name}model"), "Decompressed file exists"

Beispiel #8

0

Datei anzeigen

Datei: test_utils.py Projekt: arjun-krishna/bugbug

def test_download_check_etag_fallback(tmp_path):
    url = "https://community-tc.services.mozilla.com/api/index/v1/task/project.relman.bugbug/prova.txt"

    responses.add(
        responses.HEAD,
        url,
        status=404,
        headers={"ETag": "123", "Last-Modified": "2019-04-16",},
    )

    responses.add(
        responses.GET, url, status=404, body=requests.exceptions.HTTPError("HTTP error")
    )

    url_fallback = url.replace(
        "https://community-tc.services.mozilla.com/api/index",
        "https://index.taskcluster.net",
    )

    responses.add(
        responses.HEAD, url_fallback, status=200, headers={"ETag": "123"},
    )

    responses.add(responses.GET, url_fallback, status=200, body="prova")

    utils.download_check_etag(url, "prova.txt")

    assert os.path.exists("prova.txt")

    with open("prova.txt", "r") as f:
        assert f.read() == "prova"

Beispiel #9

0

Datei anzeigen

Datei: generate_landings_risk_report.py Projekt: Ayushsunny/bugbug

def _download_past_bugs(url: str) -> dict:
    path = os.path.join("data", os.path.basename(url)[:-4])
    download_check_etag(url, path=f"{path}.zst")
    zstd_decompress(path)
    assert os.path.exists(path)
    with open(path, "r") as f:
        return json.load(f)

Beispiel #10

0

Datei anzeigen

    def retrieve_push_data(self):
        # Download previous cache.
        cache_path = os.path.abspath("data/adr_cache")
        if not os.path.exists(cache_path):
            cache_available = True
            try:
                download_check_etag(ADR_CACHE_URL, "adr_cache.tar.xz")
            except requests.exceptions.HTTPError:
                logger.info("The adr cache is not available yet, trying fallback...")
                try:
                    download_check_etag(OLD_ADR_CACHE_URL, "adr_cache.tar.xz")
                except requests.exceptions.HTTPError:
                    logger.info(
                        "The adr cache is not available yet, trying fallback..."
                    )
                    cache_available = False

            if cache_available:
                with tarfile.open("adr_cache.tar.xz", "r:xz") as tar:
                    tar.extractall()
                assert os.path.exists("data/adr_cache"), "Decompressed adr cache exists"

        # Setup adr cache configuration.
        os.makedirs(os.path.expanduser("~/.config/adr"), exist_ok=True)
        with open(os.path.expanduser("~/.config/adr/config.toml"), "w") as f:
            f.write(
                f"""[adr.cache.stores]
file = {{ driver = "file", path = "{cache_path}" }}
"""
            )

        # We'll use the past TRAINING_MONTHS months only for training the model,
        # but we use 3 months more than that to calculate the failure statistics.
        subprocess.run(
            [
                "run-adr",
                "ahal/ci-recipes",
                "recipe",
                "-o",
                os.path.abspath("push_data.json"),
                "-f",
                "json",
                "push_data",
                "--",
                "--from",
                f"today-{TRAINING_MONTHS + 3}month",
                "--to",
                "today-2day",
                "--branch",
                "autoland",
            ],
            check=True,
            stdout=subprocess.DEVNULL,  # Redirect to /dev/null, as the logs are too big otherwise.
        )

        with tarfile.open("data/adr_cache.tar.xz", "w:xz") as tar:
            tar.add("data/adr_cache")

        zstd_compress("push_data.json")

Beispiel #11

0

Datei anzeigen

def download_support_file(path, file_name):
    url = urljoin(DATABASES[path]["url"], file_name)
    path = os.path.join(os.path.dirname(path), file_name)

    print(f"Downloading {url} to {path}")
    utils.download_check_etag(url, path)

    if path.endswith(".xz"):
        extract_file(path[:-3])

Beispiel #12

0

Datei anzeigen

Datei: commit_classifier.py Projekt: Luis-gd/bugbug

    def __init__(self, model_name, cache_root, git_repo_dir,
                 method_defect_predictor_dir):
        self.model_name = model_name
        self.cache_root = cache_root

        assert os.path.isdir(
            cache_root), f"Cache root {cache_root} is not a dir."
        self.repo_dir = os.path.join(cache_root, "mozilla-central")

        self.model = self.load_model(model_name)
        assert self.model is not None

        self.git_repo_dir = git_repo_dir
        if git_repo_dir:
            self.clone_git_repo("https://github.com/mozilla/gecko-dev",
                                git_repo_dir)

        self.method_defect_predictor_dir = method_defect_predictor_dir
        if method_defect_predictor_dir:
            self.clone_git_repo(
                "https://github.com/lucapascarella/MethodDefectPredictor",
                method_defect_predictor_dir,
                "fa5269b959d8ddf7e97d1e92523bb64c17f9bbcd",
            )

        if model_name == "regressor":
            self.use_test_history = False

            model_data_X_path = f"{model_name}model_data_X"
            if not os.path.exists(model_data_X_path):
                download_check_etag(
                    URL.format(model_name=model_name,
                               file_name=f"{model_data_X_path}.zst"))
                zstd_decompress(model_data_X_path)
                assert os.path.exists(
                    model_data_X_path), "Decompressed X dataset exists"

            model_data_y_path = f"{model_name}model_data_y"
            if not os.path.exists(model_data_y_path):
                download_check_etag(
                    URL.format(model_name=model_name,
                               file_name=f"{model_data_y_path}.zst"))
                zstd_decompress(model_data_y_path)
                assert os.path.exists(
                    model_data_y_path), "Decompressed y dataset exists"

            self.X = to_array(joblib.load(model_data_X_path))
            self.y = to_array(joblib.load(model_data_y_path))

        if model_name == "testselect":
            self.use_test_history = True
            assert db.download_support_file(test_scheduling.TEST_SCHEDULING_DB,
                                            test_scheduling.PAST_FAILURES_DB)
            self.past_failures_data = test_scheduling.get_past_failures()

            self.backout_model = self.load_model("backout")
            assert self.backout_model is not None

Beispiel #13

0

Datei anzeigen

Datei: commit_classifier.py Projekt: Luis-gd/bugbug

    def load_model(self, model_name):
        model_path = f"{model_name}model"
        if not os.path.exists(model_path):
            download_check_etag(
                URL.format(model_name=model_name,
                           file_name=f"{model_path}.zst"))
            zstd_decompress(model_path)
            assert os.path.exists(model_path), "Decompressed model exists"

        return get_model_class(model_name).load(model_path)

Beispiel #14

0

Datei anzeigen

Datei: labels.py Projekt: shivanshu1333/bugbug

def get_labels(file_name):
    path = os.path.join(get_labels_dir(), f"{file_name}.csv")

    if not os.path.exists(path) and file_name in LABELS_URLS:
        utils.download_check_etag(LABELS_URLS[file_name], path)

    with open(path, "r") as f:
        reader = csv.reader(f)
        next(reader)
        yield from reader

Beispiel #15

0

Datei anzeigen

def download_model(model_name):
    if not os.path.exists(f"{model_name}model"):
        url = BASE_URL.format(model_name=model_name)
        logger.info(f"Downloading {url}...")
        download_check_etag(url, f"{model_name}model.zst")
        dctx = zstandard.ZstdDecompressor()
        with open(f"{model_name}model.zst", "rb") as input_f:
            with open(f"{model_name}model", "wb") as output_f:
                dctx.copy_stream(input_f, output_f)
        assert os.path.exists(f"{model_name}model"), "Decompressed file exists"

Beispiel #16

0

Datei anzeigen

Datei: commit_classifier.py Projekt: xiaoguizi87/bugbug

    def __init__(self, cache_root):
        self.cache_root = cache_root

        assert os.path.isdir(cache_root), f"Cache root {cache_root} is not a dir."
        self.repo_dir = os.path.join(cache_root, "mozilla-central")

        if not os.path.exists("regressormodel"):
            download_check_etag(URL, "regressormodel.zst")
            zstd_decompress("regressormodel")
            assert os.path.exists("regressormodel"), "Decompressed file exists"

        self.model = RegressorModel.load("regressormodel")

Beispiel #17

0

Datei anzeigen

Datei: db.py Projekt: rajathans/bugbug

def download_support_file(path, file_name):
    try:
        url = urljoin(DATABASES[path]["url"], file_name)
        path = os.path.join(os.path.dirname(path), file_name)

        print(f"Downloading {url} to {path}")
        utils.download_check_etag(url, path)

        if path.endswith(".zst"):
            extract_file(path)
    except requests.exceptions.HTTPError:
        print(f"{file_name} is not yet available to download for {path}")

Beispiel #18

0

Datei anzeigen

Datei: repository.py Projekt: shailendra93/bugbug

def download_component_mapping():
    global path_to_component

    utils.download_check_etag(
        "https://index.taskcluster.net/v1/task/gecko.v2.mozilla-central.latest.source.source-bugzilla-info/artifacts/public/components.json",
        "data/component_mapping.json",
    )

    with open("data/component_mapping.json", "r") as f:
        path_to_component = json.load(f)

    path_to_component = {
        path: "::".join(component) for path, component in path_to_component.items()
    }

Beispiel #19

0

Datei anzeigen

    def retrieve_test_scheduling_history(self):
        os.makedirs("data", exist_ok=True)

        # Download previous cache.
        cache_path = os.path.abspath("data/adr_cache")
        if not os.path.exists(cache_path):
            try:
                download_check_etag(URL, "data/adr_cache.tar.xz")
                with tarfile.open("data/adr_cache.tar.xz", "r:xz") as tar:
                    tar.extractall()
                assert os.path.exists("data/adr_cache"), "Decompressed adr cache exists"
            except requests.exceptions.HTTPError:
                logger.info("The adr cache is not available yet")

        # Setup adr cache configuration.
        os.makedirs(os.path.expanduser("~/.config/adr"), exist_ok=True)
        with open(os.path.expanduser("~/.config/adr/config.toml"), "w") as f:
            f.write(
                f"""[adr.cache.stores]
file = {{ driver = "file", path = "{cache_path}" }}
            """
            )

        # TODO: Increase timespan when https://github.com/ahal/ci-recipes/issues/6 is fixed.
        subprocess.run(
            [
                "run-adr",
                "ahal/ci-recipes",
                "recipe",
                "-o",
                os.path.abspath("data/test_scheduling_history.json"),
                "-f",
                "json",
                "push_data",
                "--",
                "--from",
                "today-3month",
                "--to",
                "today-2day",
                "--branch",
                "autoland",
            ],
            check=True,
        )

        zstd_compress("data/test_scheduling_history.json")

        with tarfile.open("data/adr_cache.tar.xz", "w:xz") as tar:
            tar.add("data/adr_cache")

Beispiel #20

0

Datei anzeigen

def download_component_mapping():
    path_to_component = get_component_mapping(False)

    utils.download_check_etag(
        "https://firefox-ci-tc.services.mozilla.com/api/index/v1/task/gecko.v2.mozilla-central.latest.source.source-bugzilla-info/artifacts/public/components.json",
        "data/component_mapping.json",
    )

    with open("data/component_mapping.json", "r") as f:
        data = json.load(f)

    for path, component in data.items():
        path_to_component[path.encode("utf-8")] = "::".join(component).encode("utf-8")

    close_component_mapping()

Beispiel #21

0

Datei anzeigen

Datei: commit_classifier.py Projekt: Shubhamchinda/bugbug

    def __init__(self, cache_root):
        self.cache_root = cache_root

        assert os.path.isdir(cache_root), f"Cache root {cache_root} is not a dir."
        self.repo_dir = os.path.join(cache_root, "mozilla-central")

        if not os.path.exists("regressormodel"):
            download_check_etag(URL, "regressormodel.zst")
            dctx = zstandard.ZstdDecompressor()
            with open("regressormodel.zst", "rb") as input_f:
                with open("regressormodel", "wb") as output_f:
                    dctx.copy_stream(input_f, output_f)
            assert os.path.exists("regressormodel"), "Decompressed file exists"

        self.model = RegressorModel.load("regressormodel")

Beispiel #22

0

Datei anzeigen

Datei: db.py Projekt: mozilla/bugbug

def download(path, support_files_too=False, extract=True):
    # If a DB with the current schema is not available yet, we can't download.
    if is_different_schema(path):
        return False

    zst_path = f"{path}.zst"

    url = DATABASES[path]["url"]
    try:
        logger.info(f"Downloading {url} to {zst_path}")
        updated = utils.download_check_etag(url, zst_path)

        if extract and updated:
            utils.extract_file(zst_path)
            os.remove(zst_path)

        successful = True
        if support_files_too:
            for support_file in DATABASES[path]["support_files"]:
                successful |= download_support_file(path, support_file,
                                                    extract)

        return successful
    except requests.exceptions.HTTPError:
        logger.info(f"{url} is not yet available to download", exc_info=True)
        return False

Beispiel #23

0

Datei anzeigen

def download_component_mapping():
    global path_to_component

    if path_to_component is not None:
        return

    utils.download_check_etag(
        "https://firefox-ci-tc.services.mozilla.com/api/index/v1/task/gecko.v2.mozilla-central.latest.source.source-bugzilla-info/artifacts/public/components.json",
        "data/component_mapping.json",
    )

    with open("data/component_mapping.json", "r") as f:
        path_to_component = json.load(f)

    path_to_component = {
        path: "::".join(component) for path, component in path_to_component.items()
    }

Beispiel #24

0

Datei anzeigen

def download(path, force=False, support_files_too=False):
    if os.path.exists(path) and not force:
        return

    xz_path = f"{path}.xz"

    # Only download if the xz file is not there yet.
    if not os.path.exists(xz_path) or force:
        url = DATABASES[path]["url"]
        print(f"Downloading {url} to {xz_path}")
        utils.download_check_etag(url, xz_path)

    extract_file(path)

    if support_files_too:
        for support_file in DATABASES[path]["support_files"]:
            download_support_file(path, support_file)

Beispiel #25

0

Datei anzeigen

def download_coverage_mapping() -> None:
    commit_to_coverage = get_coverage_mapping(False)

    utils.download_check_etag(
        "https://firefox-ci-tc.services.mozilla.com/api/index/v1/task/project.relman.code-coverage.production.cron.latest/artifacts/public/commit_coverage.json.zst",
        "data/coverage_mapping.json.zst",
    )

    zstd_decompress("data/coverage_mapping.json")
    assert os.path.exists("data/coverage_mapping.json")

    with open("data/coverage_mapping.json", "r") as f:
        data = json.load(f)

    for commit_hash, commit_stats in data.items():
        commit_to_coverage[commit_hash.encode("utf-8")] = pickle.dumps(
            commit_stats)

    close_coverage_mapping()

Beispiel #26

0

Datei anzeigen

def download_similarity_model(model_name):
    path = f"{model_name_to_class[model_name].__name__.lower()}.similaritymodel"
    url = f"https://community-tc.services.mozilla.com/api/index/v1/task/project.relman.bugbug.train_similarity.latest/artifacts/public/{path}.zst"

    logger.info(f"Downloading similarity model from {url}...")
    updated = download_check_etag(url)
    if updated:
        zstd_decompress(path)
        os.remove(f"{path}.zst")
    assert os.path.exists(path), "Decompressed file exists"
    return path

Beispiel #27

0

Datei anzeigen

    def __init__(self, cache_root):
        self.cache_root = cache_root

        assert os.path.isdir(
            cache_root), f"Cache root {cache_root} is not a dir."
        self.repo_dir = os.path.join(cache_root, "mozilla-central")

        if not os.path.exists("regressormodel"):
            download_check_etag(URL, "regressormodel.zst")
            zstd_decompress("regressormodel")
            assert os.path.exists(
                "regressormodel"), "Decompressed model exists"

        if not os.path.exists("regressormodel_data_X"):
            download_check_etag(URL, "regressormodel_data_X.zst")
            zstd_decompress("regressormodel_data_X")
            assert os.path.exists(
                "regressormodel_data_X"), "Decompressed X dataset exists"

        if not os.path.exists("regressormodel_data_y"):
            download_check_etag(URL, "regressormodel_data_y.zst")
            zstd_decompress("regressormodel_data_y")
            assert os.path.exists(
                "regressormodel_data_y"), "Decompressed y dataset exists"

        self.model = RegressorModel.load("regressormodel")
        # We use "clean" commits as the background dataset for feature importance.
        # This way, we can see the features which are most important in differentiating
        # the current commit from the "clean" commits.
        X = joblib.load("regressormodel_data_X")
        y = joblib.load("regressormodel_data_y")
        self.background_dataset = X[y == 0]

Beispiel #28

0

Datei anzeigen

Datei: db.py Projekt: rajathans/bugbug

def download(path, force=False, support_files_too=False):
    if os.path.exists(path) and not force:
        return

    zst_path = f"{path}.zst"

    # Only download if the file is not there yet.
    if not os.path.exists(zst_path) or force:
        url = DATABASES[path]["url"]
        try:
            print(f"Downloading {url} to {zst_path}")
            utils.download_check_etag(url, zst_path)

        except requests.exceptions.HTTPError:
            print(f"{url} is not yet available to download")
            return

    extract_file(zst_path)

    if support_files_too:
        for support_file in DATABASES[path]["support_files"]:
            download_support_file(path, support_file)

Beispiel #29

0

Datei anzeigen

def download(path, force=False, support_files_too=False):
    if os.path.exists(path) and not force:
        return

    zst_path = f"{path}.zst"
    xz_path = f"{path}.xz"

    # Only download if the file is not there yet.
    if (not os.path.exists(zst_path) and not os.path.exists(xz_path)) or force:
        url = DATABASES[path]["url"]
        try:
            path_compressed = zst_path
            print(f"Downloading {url} to {path_compressed}")
            utils.download_check_etag(url, path_compressed)

        except requests.exceptions.HTTPError:
            try:
                url_xz = f"{os.path.splitext(url)[0]}.xz"
                path_compressed = xz_path
                print(f"Downloading {url_xz} to {path_compressed} instead")
                utils.download_check_etag(url_xz, path_compressed)

            except requests.exceptions.HTTPError:
                print(f"{url} is not yet available to download")
                raise

    else:
        if os.path.exists(zst_path) or not os.path.exists(xz_path):
            path_compressed = zst_path
        else:
            path_compressed = xz_path

    extract_file(path_compressed)

    if support_files_too:
        for support_file in DATABASES[path]["support_files"]:
            download_support_file(path, support_file)

Beispiel #30

0

Datei anzeigen

Datei: db.py Projekt: mvkski/bugbug

def download_support_file(path, file_name):
    # If a DB with the current schema is not available yet, we can't download.
    if is_old_schema(path):
        return False

    try:
        url = urljoin(DATABASES[path]["url"], file_name)
        path = os.path.join(os.path.dirname(path), file_name)

        logger.info(f"Downloading {url} to {path}")
        updated = utils.download_check_etag(url, path)

        if updated and path.endswith(".zst"):
            extract_file(path)

        return True
    except requests.exceptions.HTTPError:
        logger.info(f"{file_name} is not yet available to download for {path}",
                    exc_info=True)
        return False