Exemple #1
0
    def test_eta(self):
        file = ModelFile("test", False)

        file.eta = 100
        self.assertEqual(100, file.eta)
        file.eta = None
        self.assertEqual(None, file.eta)

        with self.assertRaises(TypeError):
            file.eta = "BadValue"
        with self.assertRaises(ValueError):
            file.eta = -100
Exemple #2
0
 def test_eta(self):
     serialize = SerializeModel()
     a = ModelFile("a", True)
     a.eta = None
     b = ModelFile("b", False)
     b.eta = 0
     c = ModelFile("c", True)
     c.eta = 100
     files = [a, b, c]
     out = parse_stream(serialize.model(files))
     data = json.loads(out["data"])
     self.assertEqual(3, len(data))
     self.assertEqual(None, data[0]["eta"])
     self.assertEqual(0, data[1]["eta"])
     self.assertEqual(100, data[2]["eta"])
Exemple #3
0
 def test_full_path(self):
     serialize = SerializeModel()
     a = ModelFile("a", True)
     b = ModelFile("b", True)
     b.add_child(ModelFile("ba", False))
     b.add_child(ModelFile("bb", True))
     c = ModelFile("c", True)
     ca = ModelFile("ca", True)
     ca.add_child(ModelFile("caa", False))
     ca.add_child(ModelFile("cab", False))
     c.add_child(ca)
     cb = ModelFile("cb", False)
     c.add_child(cb)
     c.eta = 100
     files = [a, b, c]
     out = parse_stream(serialize.model(files))
     data = json.loads(out["data"])
     self.assertEqual(3, len(data))
     self.assertEqual("a", data[0]["full_path"])
     self.assertEqual("b", data[1]["full_path"])
     self.assertEqual("b/ba", data[1]["children"][0]["full_path"])
     self.assertEqual("b/bb", data[1]["children"][1]["full_path"])
     self.assertEqual("c", data[2]["full_path"])
     self.assertEqual("c/ca", data[2]["children"][0]["full_path"])
     self.assertEqual("c/ca/caa", data[2]["children"][0]["children"][0]["full_path"])
     self.assertEqual("c/ca/cab", data[2]["children"][0]["children"][1]["full_path"])
     self.assertEqual("c/cb", data[2]["children"][1]["full_path"])
Exemple #4
0
            def __fill_model_file(
                    _model_file: ModelFile, _remote: Optional[SystemFile],
                    _local: Optional[SystemFile],
                    _transfer_state: Optional[LftpJobStatus.TransferState]):
                # set local and remote sizes
                if _remote:
                    _model_file.remote_size = _remote.size
                if _local:
                    _model_file.local_size = _local.size

                # Note: no longer use lftp's file sizes
                #       they represent remaining size for resumed downloads

                # set the downloading speed and eta
                if _transfer_state:
                    _model_file.downloading_speed = _transfer_state.speed
                    _model_file.eta = _transfer_state.eta

                # set the transferred size (only if file or dir exists on both ends)
                if _local and _remote:
                    if _model_file.is_dir:
                        # dir transferred size is updated by child files
                        _model_file.transferred_size = 0
                    else:
                        _model_file.transferred_size = min(
                            _local.size, _remote.size)

                        # also update all parent directories
                        _parent_file = _model_file.parent
                        while _parent_file is not None:
                            _parent_file.transferred_size += _model_file.transferred_size
                            _parent_file = _parent_file.parent

                # set the is_extractable flag
                if not _model_file.is_dir and Extract.is_archive_fast(
                        _model_file.name):
                    _model_file.is_extractable = True
                    # Also set the flag for all of its parents
                    _parent_file = _model_file.parent
                    while _parent_file is not None:
                        _parent_file.is_extractable = True
                        _parent_file = _parent_file.parent

                # set the timestamps
                if _local:
                    if _local.timestamp_created:
                        _model_file.local_created_timestamp = _local.timestamp_created
                    if _local.timestamp_modified:
                        _model_file.local_modified_timestamp = _local.timestamp_modified
                if _remote:
                    if _remote.timestamp_created:
                        _model_file.remote_created_timestamp = _remote.timestamp_created
                    if _remote.timestamp_modified:
                        _model_file.remote_modified_timestamp = _remote.timestamp_modified
Exemple #5
0
    def build_model(self) -> Model:
        model = Model()
        model.set_base_logger(
            logging.getLogger("dummy"))  # ignore the logs for this temp model
        all_file_names = set().union(self.__local_files.keys(),
                                     self.__remote_files.keys(),
                                     self.__lftp_statuses.keys())
        for name in all_file_names:
            remote = self.__remote_files.get(name, None)
            local = self.__local_files.get(name, None)
            status = self.__lftp_statuses.get(name, None)

            if remote is None and local is None and status is None:
                # this should never happen, but just in case
                raise ModelError("Zero sources have a file object")

            # sanity check between the sources
            is_dir = remote.is_dir if remote else local.is_dir if local else status.type == LftpJobStatus.Type.MIRROR
            if (remote and is_dir != remote.is_dir) or \
               (local and is_dir != local.is_dir) or \
               (status and is_dir != (status.type == LftpJobStatus.Type.MIRROR)):
                raise ModelError("Mismatch in is_dir between sources")

            def __fill_model_file(
                    _model_file: ModelFile, _remote: Optional[SystemFile],
                    _local: Optional[SystemFile],
                    _transfer_state: Optional[LftpJobStatus.TransferState]):
                # set local and remote sizes
                if _remote:
                    _model_file.remote_size = _remote.size
                if _local:
                    _model_file.local_size = _local.size

                # Note: no longer use lftp's file sizes
                #       they represent remaining size for resumed downloads

                # set the downloading speed and eta
                if _transfer_state:
                    _model_file.downloading_speed = _transfer_state.speed
                    _model_file.eta = _transfer_state.eta

                # set the transferred size (only if file or dir exists on both ends)
                if _local and _remote:
                    if _model_file.is_dir:
                        # dir transferred size is updated by child files
                        _model_file.transferred_size = 0
                    else:
                        _model_file.transferred_size = min(
                            _local.size, _remote.size)

                        # also update all parent directories
                        _parent_file = _model_file.parent
                        while _parent_file is not None:
                            _parent_file.transferred_size += _model_file.transferred_size
                            _parent_file = _parent_file.parent

                # set the is_extractable flag
                if not _model_file.is_dir and Extract.is_archive_fast(
                        _model_file.name):
                    _model_file.is_extractable = True
                    # Also set the flag for all of its parents
                    _parent_file = _model_file.parent
                    while _parent_file is not None:
                        _parent_file.is_extractable = True
                        _parent_file = _parent_file.parent

            model_file = ModelFile(name, is_dir)
            # set the file state
            # for now we only set to Queued or Downloading
            # later after all children are built, we can set to Downloaded after performing a check
            if status:
                model_file.state = ModelFile.State.QUEUED if status.state == LftpJobStatus.State.QUEUED \
                                   else ModelFile.State.DOWNLOADING
            # fill the rest
            __fill_model_file(
                model_file, remote, local,
                status.total_transfer_state if status
                and status.state == LftpJobStatus.State.RUNNING else None)

            # Traverse SystemFile children tree in BFS order
            # Store (remote, local, status, model_file) tuple in traversal frontier where remote and local
            # correspond to the same node in both remote and local SystemFile trees, status corresponds
            # to the LFTP status for the entire tree, and model_file corresponds to the generated ModelFile
            # for the pair
            # Note: in this case the frontier contains nodes that have already been process, it is
            #       merely used for traversing children
            frontier = []
            if remote or local:
                frontier.append((remote, local, status, model_file))
            while frontier:
                _remote, _local, _status, _model_file = frontier.pop(0)
                _remote_children = {sf.name: sf
                                    for sf in _remote.children
                                    } if _remote else {}
                _local_children = {sf.name: sf
                                   for sf in _local.children
                                   } if _local else {}
                _all_children_names = set().union(_remote_children.keys(),
                                                  _local_children.keys())
                for _child_name in _all_children_names:
                    _remote_child = _remote_children.get(_child_name, None)
                    _local_child = _local_children.get(_child_name, None)
                    _is_dir = _remote_child.is_dir if _remote_child else _local_child.is_dir
                    # sanity check is_dir
                    if (_remote_child and _is_dir != _remote_child.is_dir) or \
                       (_local_child and _is_dir != _local_child.is_dir):
                        raise ModelError(
                            "Mismatch in is_dir between child sources")
                    _child_model_file = ModelFile(_child_name, _is_dir)

                    # add it to the parent right away so we can access the full path
                    _model_file.add_child(_child_model_file)

                    # find the transfer state (if it exists) corresponding to this child
                    # Note: transfer states are in full paths
                    # Note2: transfer states don't include root path
                    _child_status_path = os.path.join(
                        *(_child_model_file.full_path.split(os.sep)[1:]))
                    _child_transfer_state = None
                    if _status:
                        _child_transfer_state = next(
                            (ts for n, ts in
                             _status.get_active_file_transfer_states()
                             if n == _child_status_path), None)
                    # Set the state, first matching criteria below decides state
                    #   child is a directory: Default
                    #   child is active: Downloading
                    #   child local_size >= remote_size: Downloaded
                    #   remote child exists and root is Queued or Downloading: Queued
                    #   Default
                    # Result:
                    #   subdirectories are always Default
                    #   downloading files are Downloading
                    #   finished files are Downloaded
                    #   Queued and Downloading root's unfinished files are Queued
                    #   Local-only files are Default
                    if _is_dir:
                        _child_model_file.state = ModelFile.State.DEFAULT
                    elif _child_transfer_state:
                        _child_model_file.state = ModelFile.State.DOWNLOADING
                    elif _remote_child and _local_child and _local_child.size >= _remote_child.size:
                        _child_model_file.state = ModelFile.State.DOWNLOADED
                    elif _remote_child and model_file.state in (
                            ModelFile.State.QUEUED,
                            ModelFile.State.DOWNLOADING):
                        _child_model_file.state = ModelFile.State.QUEUED
                    else:
                        _child_model_file.state = ModelFile.State.DEFAULT

                    # fill the rest
                    __fill_model_file(_child_model_file, _remote_child,
                                      _local_child, _child_transfer_state)
                    # add child to frontier
                    frontier.append((_remote_child, _local_child, _status,
                                     _child_model_file))

            # estimate the ETA for the root if it's not available
            if model_file.state == ModelFile.State.DOWNLOADING and \
                    model_file.eta is None and \
                    model_file.downloading_speed is not None and \
                    model_file.downloading_speed > 0 and \
                    model_file.transferred_size is not None:
                # First-order estimate
                remaining_size = max(
                    model_file.remote_size - model_file.transferred_size, 0)
                model_file.eta = int(
                    math.ceil(remaining_size / model_file.downloading_speed))

            # now we can determine if root is Downloaded
            # root is Downloaded if all child remote files are Downloaded
            # again we use BFS to traverse
            if model_file.state == ModelFile.State.DEFAULT:
                if not model_file.is_dir and \
                        model_file.local_size is not None and \
                        model_file.remote_size is not None and \
                        model_file.local_size >= model_file.remote_size:
                    # root is a finished single file
                    model_file.state = ModelFile.State.DOWNLOADED
                elif model_file.is_dir and model_file.remote_size is not None:
                    # root is a directory that also exists remotely
                    # check all the children
                    all_downloaded = True
                    frontier = []
                    frontier += model_file.get_children()
                    while frontier:
                        _child_file = frontier.pop(0)
                        if not _child_file.is_dir and \
                                _child_file.remote_size is not None and \
                                _child_file.state != ModelFile.State.DOWNLOADED:
                            all_downloaded = False
                            break
                        frontier += _child_file.get_children()
                    if all_downloaded:
                        model_file.state = ModelFile.State.DOWNLOADED

            # next we determine if root was Deleted
            # root is Deleted if it does not exist locally, but was downloaded in the past
            if model_file.state == ModelFile.State.DEFAULT and \
                    model_file.local_size is None and \
                    model_file.name in self.__downloaded_files:
                model_file.state = ModelFile.State.DELETED

            # next we check if root is Extracting
            # root is Extracting if it's part of an extract status, in an expected state,
            # and exists locally
            # if root is NOT in an expected state, then ignore the extract status
            # and report a warning message, as this shouldn't be happening
            if model_file.name in self.__extract_statuses:
                extract_status = self.__extract_statuses[model_file.name]
                if model_file.is_dir != extract_status.is_dir:
                    raise ModelError(
                        "Mismatch in is_dir between file and extract status")
                if model_file.state in (
                        ModelFile.State.DEFAULT, ModelFile.State.DOWNLOADED
                ) and model_file.local_size is not None:
                    model_file.state = ModelFile.State.EXTRACTING
                else:
                    if model_file.local_size is None:
                        self.logger.warning(
                            "File {} has extract status but doesn't exist locally!"
                            .format(model_file.name))
                    else:
                        self.logger.warning(
                            "File {} has extract status but is in state {}".
                            format(model_file.name, str(model_file.state)))

            # next we check if root is Extracted
            # root is Extracted if it is in Downloaded state and in extracted files list
            # Note: Default files aren't marked extracted because they can still be queued
            #       for download, and it doesn't make sense to queue after extracting
            #       If a Default file is extracted, it will return back to the Default state
            if model_file.name in self.__extracted_files and model_file.state == ModelFile.State.DOWNLOADED:
                model_file.state = ModelFile.State.EXTRACTED

            model.add_file(model_file)

        return model