Ejemplo n.º 1
0
def read_file_or_url(in_file_or_url, path_searcher=None, encoding='utf-8', save_to_path=None, checksum=None):
    need_to_download = not utils.check_file_checksum(save_to_path, checksum)
    if not need_to_download:
        # if save_to_path contains the correct data just read it by recursively
        # calling read_file_or_url
        return read_file_or_url(save_to_path, encoding=encoding)
    match = protocol_header_re.match(in_file_or_url)
    if not match:  # it's a local file
        local_file_path = in_file_or_url
        if path_searcher is not None:
            local_file_path = path_searcher.find_file(local_file_path)
        if local_file_path:
            if 'Win' in utils.get_current_os_names():
                local_file_path = os.path.abspath(local_file_path)
            else:
                local_file_path = os.path.realpath(local_file_path)
        else:
            raise FileNotFoundError("Could not locate local file", local_file_path)
        if encoding is None:
            fd = open(local_file_path, "rb")
        else:
            fd = open(local_file_path, "r", encoding=encoding)
        buffer = fd.read()
    else:
        session = pyinstl.connectionBase.connection_factory().get_session(in_file_or_url)
        response = session.get(in_file_or_url, timeout=(33.05, 180.05))
        response.raise_for_status()
        buffer = response.text
    buffer = utils.unicodify(buffer) # make sure text is unicode
    if save_to_path and in_file_or_url != save_to_path:
        with open(save_to_path, "w") as wfd:
            wfd.write(buffer)
    return buffer
Ejemplo n.º 2
0
def download_and_cache_file_or_url(in_url, cache_folder, translate_url_callback=None, expected_checksum=None):
    """ download file to given cache folder
        if checksum is supplied and the a file with that checksum exists in cache folder - download can be avoided
        otherwise download the file
        :return: path of the downloaded file
    """

    if os.path.isfile(cache_folder):  # happens sometimes...
        safe_remove_file(cache_folder)
    if not os.path.isdir(cache_folder):
        os.makedirs(cache_folder, exist_ok=True)

    url_file_name = last_url_item(in_url)
    cached_file_name = expected_checksum if expected_checksum else url_file_name
    cached_file_path = os.path.join(cache_folder, cached_file_name)
    if expected_checksum is None:  # no checksum? -> force download
        safe_remove_file(cached_file_path)

    if os.path.isfile(cached_file_path):  # file exists? -> make sure it has the right checksum
        if not utils.check_file_checksum(cached_file_path, expected_checksum):
            safe_remove_file(cached_file_path)

    if not os.path.isfile(cached_file_path):  # need to download
        contents_buffer = read_from_file_or_url(in_url, translate_url_callback, expected_checksum, encoding=None)
        if contents_buffer:
            with open(cached_file_path, "wb") as wfd:
                make_open_file_read_write_for_all(wfd)
                wfd.write(contents_buffer)
    return cached_file_path
Ejemplo n.º 3
0
    def read_include_node(self, i_node, *args, **kwargs):
        if i_node.isScalar():
            kwargs['original-path-to-file'] = i_node.value
            resolved_file_name = config_vars.resolve_str(i_node.value)
            self.read_yaml_file(resolved_file_name, *args, **kwargs)
        elif i_node.isSequence():
            for sub_i_node in i_node:
                self.read_include_node(sub_i_node, *args, **kwargs)
        elif i_node.isMapping():
            if "url" in i_node:
                file_was_downloaded_and_read = False
                kwargs['original-path-to-file'] = i_node["url"].value
                resolved_file_url = config_vars.resolve_str(
                    i_node["url"].value)
                expected_checksum = None
                if "checksum" in i_node:
                    expected_checksum = config_vars.resolve_str(
                        i_node["checksum"].value)

                try:
                    file_path = utils.download_from_file_or_url(
                        in_url=resolved_file_url,
                        config_vars=config_vars,
                        in_target_path=None,
                        translate_url_callback=connectionBase.translate_url,
                        cache_folder=self.get_aux_cache_dir(make_dir=True),
                        expected_checksum=expected_checksum)
                    self.read_yaml_file(file_path, *args, **kwargs)
                    file_was_downloaded_and_read = True
                except (FileNotFoundError, urllib.error.URLError):
                    ignore = kwargs.get('ignore_if_not_exist', False)
                    if ignore:
                        self.progress(
                            f"'ignore_if_not_exist' specified, ignoring FileNotFoundError for {resolved_file_url}"
                        )
                    else:
                        raise

                if "copy" in i_node and file_was_downloaded_and_read:
                    self.batch_accum.set_current_section('post')
                    for copy_destination in i_node["copy"]:
                        need_to_copy = True
                        destination_file_resolved_path = utils.ExpandAndResolvePath(
                            config_vars.resolve_str(copy_destination.value))
                        if destination_file_resolved_path.is_file(
                        ) and expected_checksum is not None:
                            checksums_match = utils.check_file_checksum(
                                file_path=destination_file_resolved_path,
                                expected_checksum=expected_checksum)
                            need_to_copy = not checksums_match
                        if need_to_copy:
                            self.batch_accum += MakeDir(
                                destination_file_resolved_path.parent,
                                chowner=True)
                            self.batch_accum += CopyFileToFile(
                                file_path,
                                destination_file_resolved_path,
                                hard_links=False,
                                copy_owner=True)
Ejemplo n.º 4
0
    def read_include_node(self, i_node, *args, **kwargs):
        if i_node.isScalar():
            resolved_file_name = var_stack.ResolveStrToStr(i_node.value)
            self.read_yaml_file(resolved_file_name, *args, **kwargs)
        elif i_node.isSequence():
            for sub_i_node in i_node:
                self.read_include_node(sub_i_node, *args, **kwargs)
        elif i_node.isMapping():
            if "url" in i_node:
                cached_files_dir = self.get_default_sync_dir(continue_dir="cache", make_dir=True)
                resolved_file_url = var_stack.ResolveStrToStr(i_node["url"].value)
                cached_file_path = None
                expected_checksum = None
                if "checksum" in i_node:
                    expected_checksum = var_stack.ResolveStrToStr(i_node["checksum"].value)
                    cached_file_path = os.path.join(cached_files_dir, expected_checksum)

                expected_signature = None
                public_key_text = None
                if "sig" in i_node:
                    expected_signature = var_stack.ResolveStrToStr(i_node["sig"].value)
                    public_key_text = self.provision_public_key_text()

                if expected_checksum is None:
                    self.read_yaml_file(resolved_file_url, *args, **kwargs)
                    cached_file_path = resolved_file_url
                else:
                    try:
                        utils.download_from_file_or_url(resolved_file_url,cached_file_path,
                                                  connectionBase.translate_url, cache=True,
                                                  public_key=public_key_text,
                                                  textual_sig=expected_signature,
                                                  expected_checksum=expected_checksum)
                        self.read_yaml_file(cached_file_path, *args, **kwargs)
                    except (FileNotFoundError, urllib.error.URLError):
                        ignore = kwargs.get('ignore_if_not_exist', False)
                        if ignore:
                            print("'ignore_if_not_exist' specified, ignoring FileNotFoundError for", resolved_file_url)
                        else:
                            raise

                if "copy" in i_node:
                    self.batch_accum.set_current_section('post')
                    for copy_destination in i_node["copy"]:
                        need_to_copy = True
                        destination_file_resolved_path = var_stack.ResolveStrToStr(copy_destination.value)
                        if os.path.isfile(destination_file_resolved_path) and expected_checksum is not None:
                            checksums_match = utils.check_file_checksum(file_path=destination_file_resolved_path, expected_checksum=expected_checksum)
                            need_to_copy = not checksums_match
                        if need_to_copy:
                            destination_folder, destination_file_name = os.path.split(copy_destination.value)
                            self.batch_accum += self.platform_helper.mkdir(destination_folder)
                            self.batch_accum += self.platform_helper.copy_tool.copy_file_to_file(cached_file_path,
                                                                                                 var_stack.ResolveStrToStr(copy_destination.value),
                                                                                                 link_dest=True)
Ejemplo n.º 5
0
 def can_copy_be_avoided(self, dir_item, source_items):
     retVal = False
     if "__REPAIR_INSTALLED_ITEMS__" not in self.main_install_targets:
         # look for Info.xml as first choice, Info.plist is seconds choice
         info_item = next((i for i in source_items if i.leaf=="Info.xml"), None) or next((i for i in source_items if i.leaf=="Info.plist"), None)
         if info_item:  # no info item - return False
             destination_folder = var_stack.ResolveStrToStr(self.current_destination_folder)
             dir_item_parent, dir_item_leaf = os.path.split(var_stack.ResolveStrToStr(dir_item.path))
             info_item_abs_path = os.path.join(destination_folder, dir_item_leaf, info_item.path[len(dir_item.path)+1:])
             retVal = utils.check_file_checksum(info_item_abs_path, info_item.checksum)
     return retVal
Ejemplo n.º 6
0
 def __call__(self, *args, **kwargs):
     PythonBatchCommandBase.__call__(self, *args, **kwargs)
     session = kwargs['session']
     with MakeDir(self.path.parent, report_own_progress=False) as dir_maker:
         dir_maker()
     with open(self.path, "wb") as fo:
         timeout_seconds = int(config_vars.get("CURL_MAX_TIME", 480))
         read_data = session.get(self.url, timeout=timeout_seconds)
         read_data.raise_for_status()  # must raise in case of an error. Server might return json/xml with error details, we do not want that
         fo.write(read_data.content)
     checksum_ok = utils.check_file_checksum(self.path, self.checksum)
     if not checksum_ok:
         raise ValueError(f"bad checksum for {self.path} even after re-download")
Ejemplo n.º 7
0
def unwtar_with_checks(tar_files, target_folder, tar_real_name):

    with utils.Timer_CM("unwtar_with_checks") as utc:
        ok_files = 0
        to_untar_files = 0
        with MultiFileReader("br", tar_files) as fd:
            with tarfile.open(fileobj=fd) as tar:
                the_pax_headers = tar.pax_headers
                for item in tar.getmembers():
                    checksum_good = utils.check_file_checksum(os.path.join(target_folder, item.path), the_pax_headers[item.path])
                    if not checksum_good:
                        to_untar_files += 1
                        tar.extract(item, target_folder)
                    else:
                        ok_files += 1
    print("   ", "unwtar_with_checks:", tar_files[0], to_untar_files, "files unwtarred,", ok_files, "not unwtarred")
Ejemplo n.º 8
0
def read_file_or_url(in_file_or_url,
                     config_vars,
                     path_searcher=None,
                     encoding='utf-8',
                     save_to_path=None,
                     checksum=None,
                     connection_obj=None):
    need_to_download = not utils.check_file_checksum(save_to_path, checksum)
    if not need_to_download:
        # if save_to_path contains the correct data just read it by recursively
        # calling read_file_or_url
        return read_file_or_url(save_to_path, config_vars, encoding=encoding)
    match = protocol_header_re.match(os.fspath(in_file_or_url))
    actual_file_path = in_file_or_url
    if not match:  # it's a local file
        if path_searcher is not None:
            actual_file_path = path_searcher.find_file(actual_file_path)
        if actual_file_path:
            if 'Win' in utils.get_current_os_names():
                actual_file_path = os.path.abspath(actual_file_path)
            else:
                actual_file_path = os.path.realpath(actual_file_path)
        else:
            raise FileNotFoundError(
                f"Could not locate local file {in_file_or_url}")
        if encoding is None:
            read_mod = "rb"
        else:
            read_mod = "r"
        with open(actual_file_path, "r", encoding=encoding) as rdf:
            buffer = rdf.read()
    else:
        assert connection_obj, "no connection_obj given"
        session = connection_obj.get_session(in_file_or_url)
        response = session.get(in_file_or_url, timeout=(33.05, 180.05))
        response.raise_for_status()
        buffer = response.text
    buffer = utils.unicodify(buffer)  # make sure text is unicode
    if save_to_path and in_file_or_url != save_to_path:
        with open(save_to_path, "w") as wfd:
            utils.chown_chmod_on_fd(wfd)
            wfd.write(buffer)
    return buffer, actual_file_path
Ejemplo n.º 9
0
def download_and_cache_file_or_url(in_url,
                                   config_vars,
                                   cache_folder: Path,
                                   translate_url_callback=None,
                                   expected_checksum=None):
    """ download file to given cache folder
        if checksum is supplied and the a file with that checksum exists in cache folder - download can be avoided
        otherwise download the file
        :return: path of the downloaded file
    """

    if cache_folder.is_file():  # happens sometimes...
        safe_remove_file(cache_folder)
    cache_folder.mkdir(parents=True, exist_ok=True)

    url_file_name = last_url_item(in_url)
    cached_file_name = expected_checksum if expected_checksum else url_file_name
    cached_file_path = cache_folder.joinpath(cached_file_name)
    if expected_checksum is None:  # no checksum? -> force download
        safe_remove_file(cached_file_path)

    if cached_file_path.is_file(
    ):  # file exists? -> make sure it has the right checksum
        if not utils.check_file_checksum(cached_file_path, expected_checksum):
            safe_remove_file(cached_file_path)

    if not cached_file_path.is_file():  # need to download
        contents_buffer = read_from_file_or_url(in_url,
                                                config_vars,
                                                translate_url_callback,
                                                expected_checksum,
                                                encoding=None)
        if contents_buffer:
            with open(cached_file_path, "wb") as wfd:
                chown_chmod_on_fd(wfd)
                wfd.write(contents_buffer)
    return cached_file_path
Ejemplo n.º 10
0
    def set_sync_locations_for_active_items(self):
        # get_sync_folders_and_sources_for_active_iids returns: [(iid, direct_sync_indicator, source, source_tag, install_folder),...]
        # direct_sync_indicator will be None unless the items has "direct_sync" section in index.yaml
        # source is the relative path as it appears in index.yaml
        # adjusted source is the source prefixed with $(SOURCE_PREFIX) -- it needed
        # source_tag is one of  '!dir', '!dir_cont', '!file'
        # install_folder is where the sources should be copied to OR, in case of direct syn where they should be synced to
        # install_folder will be None for those items that require only sync not copy (such as Icons)
        #
        # for each file item in the source this function will set the full path where to download the file: item.download_path
        # and the top folder common to all items in a single source: item.download_root
        sync_and_source = self.items_table.get_sync_folders_and_sources_for_active_iids(
        )

        items_to_update = list()
        local_repo_sync_dir = os.fspath(config_vars["LOCAL_REPO_SYNC_DIR"])
        config_vars.setdefault("ALL_SYNC_DIRS", local_repo_sync_dir)
        for iid, direct_sync_indicator, source, source_tag, install_folder in sync_and_source:
            direct_sync = self.get_direct_sync_status_from_indicator(
                direct_sync_indicator)
            resolved_source_parts = source.split("/")
            if install_folder:
                resolved_install_folder = config_vars.resolve_str(
                    install_folder)
            else:
                resolved_install_folder = install_folder

            if source_tag in ('!dir', '!dir_cont'):
                if direct_sync:
                    # for direct-sync source, if one of the sources is Info.xml and it exists on disk AND source & file
                    # have the same checksum, then no sync is needed at all. All the above is not relevant in repair mode.
                    need_to_sync = True
                    if not self.update_mode:
                        info_xml_item = self.info_map_table.get_file_item(
                            "/".join((source, "Info.xml")))
                        if info_xml_item:
                            info_xml_of_target = config_vars.resolve_str(
                                "/".join(
                                    (resolved_install_folder,
                                     resolved_source_parts[-1], "Info.xml")))
                            need_to_sync = not utils.check_file_checksum(
                                info_xml_of_target, info_xml_item.checksum)
                    if need_to_sync:
                        config_vars["ALL_SYNC_DIRS"].append(
                            resolved_install_folder)
                        item_paths = self.info_map_table.get_recursive_paths_in_dir(
                            dir_path=source, what="any")
                        self.progress(
                            f"mark for download {len(item_paths)} files of {iid}/{source}"
                        )
                        if source_tag == '!dir':
                            source_parent = "/".join(
                                resolved_source_parts[:-1])
                            for item in item_paths:
                                items_to_update.append({
                                    "_id":
                                    item['_id'],
                                    "download_path":
                                    config_vars.resolve_str("/".join(
                                        (resolved_install_folder,
                                         item['path'][len(source_parent) +
                                                      1:]))),
                                    "download_root":
                                    config_vars.resolve_str("/".join(
                                        (resolved_install_folder,
                                         resolved_source_parts[-1])))
                                })
                        else:  # !dir_cont
                            source_parent = source
                            for item in item_paths:
                                items_to_update.append({
                                    "_id":
                                    item['_id'],
                                    "download_path":
                                    config_vars.resolve_str("/".join(
                                        (resolved_install_folder,
                                         item['path'][len(source_parent) +
                                                      1:]))),
                                    "download_root":
                                    resolved_install_folder
                                })
                    else:
                        num_ignored_files = self.info_map_table.ignore_file_paths_of_dir(
                            dir_path=source)
                        if num_ignored_files < 1:
                            num_ignored_files = ""  # sqlite curs.rowcount does not always returns the number of effected rows
                        self.progress(
                            f"avoid download {num_ignored_files} files of {iid}, Info.xml has not changed"
                        )

                else:
                    item_paths = self.info_map_table.get_recursive_paths_in_dir(
                        dir_path=source)
                    self.progress(
                        f"mark for download {len(item_paths)} files of {iid}/{source}"
                    )
                    for item in item_paths:
                        items_to_update.append({
                            "_id":
                            item['_id'],
                            "download_path":
                            config_vars.resolve_str("/".join(
                                (local_repo_sync_dir, item['path']))),
                            "download_root":
                            None
                        })
            elif source_tag == '!file':
                # if the file was wtarred and split it would have multiple items
                items_for_file = self.info_map_table.get_required_paths_for_file(
                    source)
                self.progress(
                    f"mark for download {len(items_for_file)} files of {iid}/{source}"
                )
                if direct_sync:
                    config_vars["ALL_SYNC_DIRS"].append(
                        resolved_install_folder)
                    for item in items_for_file:
                        items_to_update.append({
                            "_id":
                            item['_id'],
                            "download_path":
                            config_vars.resolve_str("/".join(
                                (resolved_install_folder, item['leaf']))),
                            "download_root":
                            config_vars.resolve_str(item.download_path)
                        })
                else:
                    for item in items_for_file:
                        items_to_update.append(
                            {
                                "_id":
                                item['_id'],
                                "download_path":
                                config_vars.resolve_str("/".join(
                                    (local_repo_sync_dir, item['path']))),
                                "download_root":
                                None
                            }
                        )  # no need to set item.download_root here - it will not be used

        self.info_map_table.update_downloads(items_to_update)
Ejemplo n.º 11
0
    def set_sync_locations_for_active_items(self):
        # get_sync_folders_and_sources_for_active_iids returns: [(iid, direct_sync_indicator, source, source_tag, install_folder),...]
        # direct_sync_indicator will be None unless the items has "direct_sync" section in index.yaml
        # source is the relative path as it appears in index.yaml
        # adjusted source is the source prefixed with $(SOURCE_PREFIX) -- it needed
        # source_tag is one of  '!dir', '!dir_cont', '!file'
        # install_folder is where the sources should be copied to OR, in case of direct syn where they should be synced to
        # install_folder will be None for those items that require only sync not copy (such as Icons)
        #
        # for each file item in the source this function will set the full path where to download the file: item.download_path
        # and the top folder common to all items in a single source: item.download_root
        sync_and_source = self.items_table.get_sync_folders_and_sources_for_active_iids()

        items_to_update = list()
        for iid, direct_sync_indicator, source, source_tag, install_folder in sync_and_source:
            direct_sync = self.get_direct_sync_status_from_indicator(direct_sync_indicator)
            resolved_source_parts = source.split("/")
            if install_folder:
                resolved_install_folder = var_stack.ResolveStrToStr(install_folder)
            else:
                resolved_install_folder = install_folder
            local_repo_sync_dir = var_stack.ResolveVarToStr("LOCAL_REPO_SYNC_DIR")

            if source_tag in ('!dir', '!dir_cont'):
                if direct_sync:
                    # for direct-sync source, if one of the sources is Info.xml and it exists on disk AND source & file
                    # have the same checksum, then no sync is needed at all. All the above is not relevant in repair mode.
                    need_to_sync = True
                    if "__REPAIR_INSTALLED_ITEMS__" not in self.main_install_targets:
                        info_xml_item = self.info_map_table.get_file_item("/".join((source, "Info.xml")))
                        if info_xml_item:
                            info_xml_of_target = var_stack.ResolveStrToStr("/".join((resolved_install_folder, resolved_source_parts[-1], "Info.xml")))
                            need_to_sync = not utils.check_file_checksum(info_xml_of_target, info_xml_item.checksum)
                    if need_to_sync:
                        item_paths = self.info_map_table.get_file_paths_of_dir(dir_path=source)
                        if source_tag == '!dir':
                            source_parent = "/".join(resolved_source_parts[:-1])
                            for item in item_paths:
                                items_to_update.append({"_id": item['_id'],
                                                        "download_path": var_stack.ResolveStrToStr("/".join((resolved_install_folder, item['path'][len(source_parent)+1:]))),
                                                        "download_root": var_stack.ResolveStrToStr("/".join((resolved_install_folder, resolved_source_parts[-1])))})
                        else:  # !dir_cont
                            source_parent = source
                            for item in item_paths:
                                items_to_update.append({"_id": item['_id'],
                                                        "download_path": var_stack.ResolveStrToStr("/".join((resolved_install_folder, item['path'][len(source_parent)+1:]))),
                                                        "download_root": resolved_install_folder})
                    else:
                        num_ignored_files = self.info_map_table.ignore_file_paths_of_dir(dir_path=source)
                        if num_ignored_files < 1:
                            num_ignored_files = ""  # sqlite curs.rowcount does not always returns the number of effected rows
                        self.progress("avoid download {} files of {}, Info.xml has not changed".format(num_ignored_files, iid))

                else:
                    item_paths = self.info_map_table.get_file_paths_of_dir(dir_path=source)
                    for item in item_paths:
                        items_to_update.append({"_id": item['_id'],
                                                "download_path": var_stack.ResolveStrToStr("/".join((local_repo_sync_dir, item['path']))),
                                                "download_root": None})
            elif source_tag == '!file':
                # if the file was wtarred and split it would have multiple items
                items_for_file = self.info_map_table.get_required_paths_for_file(source)
                if direct_sync:
                    for item in items_for_file:
                        items_to_update.append({"_id": item['_id'],
                                                "download_path": var_stack.ResolveStrToStr("/".join((resolved_install_folder, item['leaf']))),
                                                "download_root": var_stack.ResolveStrToStr(item.download_path)})
                else:
                    for item in items_for_file:
                        items_to_update.append({"_id": item['_id'],
                                                "download_path": var_stack.ResolveStrToStr("/".join((local_repo_sync_dir, item['path']))),
                                                "download_root": None})  # no need to set item.download_root here - it will not be used

        self.info_map_table.update_downloads(items_to_update)