Beispiel #1
0
class Site(object):

    def __init__(self, address, allow_create=True):
        self.address = re.sub("[^A-Za-z0-9]", "", address)  # Make sure its correct address
        self.address_short = "%s..%s" % (self.address[:6], self.address[-4:])  # Short address for logging
        self.log = logging.getLogger("Site:%s" % self.address_short)
        self.addEventListeners()

        self.content = None  # Load content.json
        self.peers = {}  # Key: ip:port, Value: Peer.Peer
        self.peer_blacklist = SiteManager.peer_blacklist  # Ignore this peers (eg. myself)
        self.time_announce = 0  # Last announce time to tracker
        self.last_tracker_id = random.randint(0, 10)  # Last announced tracker id
        self.worker_manager = WorkerManager(self)  # Handle site download from other peers
        self.bad_files = {}  # SHA check failed files, need to redownload {"inner.content": 1} (key: file, value: failed accept)
        self.content_updated = None  # Content.js update time
        self.notifications = []  # Pending notifications displayed once on page load [error|ok|info, message, timeout]
        self.page_requested = False  # Page viewed in browser

        self.storage = SiteStorage(self, allow_create=allow_create)  # Save and load site files
        self.loadSettings()  # Load settings from sites.json
        self.content_manager = ContentManager(self)  # Load contents
        self.connection_server = None
        if "main" in sys.modules and "file_server" in dir(sys.modules["main"]):  # Use global file server by default if possible
            self.connection_server = sys.modules["main"].file_server
        else:
            self.connection_server = None
        if not self.settings.get("auth_key"):  # To auth user in site (Obsolete, will be removed)
            self.settings["auth_key"] = CryptHash.random()
            self.log.debug("New auth key: %s" % self.settings["auth_key"])
            self.saveSettings()

        if not self.settings.get("wrapper_key"):  # To auth websocket permissions
            self.settings["wrapper_key"] = CryptHash.random()
            self.log.debug("New wrapper key: %s" % self.settings["wrapper_key"])
            self.saveSettings()

        self.websockets = []  # Active site websocket connections

    def __str__(self):
        return "Site %s" % self.address_short

    def __repr__(self):
        return "<%s>" % self.__str__()

    # Load site settings from data/sites.json
    def loadSettings(self):
        sites_settings = json.load(open("%s/sites.json" % config.data_dir))
        if self.address in sites_settings:
            self.settings = sites_settings[self.address]
        else:
            self.settings = {"own": False, "serving": True, "permissions": [], "added": int(time.time())}  # Default

        # Add admin permissions to homepage
        if self.address == config.homepage and "ADMIN" not in self.settings["permissions"]:
            self.settings["permissions"].append("ADMIN")

        return

    # Save site settings to data/sites.json
    def saveSettings(self):
        sites_settings = json.load(open("%s/sites.json" % config.data_dir))
        sites_settings[self.address] = self.settings
        helper.atomicWrite("%s/sites.json" % config.data_dir, json.dumps(sites_settings, indent=2, sort_keys=True))

    # Max site size in MB
    def getSizeLimit(self):
        return self.settings.get("size_limit", config.size_limit)

    # Next size limit based on current size
    def getNextSizeLimit(self):
        size_limits = [10, 20, 50, 100, 200, 500, 1000, 2000, 5000, 10000, 20000, 50000, 100000]
        size = self.settings.get("size", 0)
        for size_limit in size_limits:
            if size * 1.2 < size_limit * 1024 * 1024:
                return size_limit
        return 999999

    # Download all file from content.json
    def downloadContent(self, inner_path, download_files=True, peer=None, check_modifications=False):
        s = time.time()
        self.log.debug("Downloading %s..." % inner_path)
        found = self.needFile(inner_path, update=self.bad_files.get(inner_path))
        content_inner_dir = helper.getDirname(inner_path)
        if not found:
            self.log.debug("Download %s failed, check_modifications: %s" % (inner_path, check_modifications))
            if check_modifications:  # Download failed, but check modifications if its succed later
                self.onFileDone.once(lambda file_name: self.checkModifications(0), "check_modifications")
            return False  # Could not download content.json

        self.log.debug("Got %s" % inner_path)
        changed, deleted = self.content_manager.loadContent(inner_path, load_includes=False)

        # Start download files
        file_threads = []
        if download_files:
            for file_relative_path in self.content_manager.contents[inner_path].get("files", {}).keys():
                file_inner_path = content_inner_dir + file_relative_path
                # Start download and dont wait for finish, return the event
                res = self.needFile(file_inner_path, blocking=False, update=self.bad_files.get(file_inner_path), peer=peer)
                if res is not True and res is not False:  # Need downloading and file is allowed
                    file_threads.append(res)  # Append evt

            # Optionals files
            if inner_path == "content.json":
                gevent.spawn(self.updateHashfield)

            if self.settings.get("autodownloadoptional"):
                for file_relative_path in self.content_manager.contents[inner_path].get("files_optional", {}).keys():
                    file_inner_path = content_inner_dir + file_relative_path
                    # Start download and dont wait for finish, return the event
                    res = self.needFile(file_inner_path, blocking=False, update=self.bad_files.get(file_inner_path), peer=peer)
                    if res is not True and res is not False:  # Need downloading and file is allowed
                        file_threads.append(res)  # Append evt

        # Wait for includes download
        include_threads = []
        for file_relative_path in self.content_manager.contents[inner_path].get("includes", {}).keys():
            file_inner_path = content_inner_dir + file_relative_path
            include_thread = gevent.spawn(self.downloadContent, file_inner_path, download_files=download_files, peer=peer)
            include_threads.append(include_thread)

        self.log.debug("%s: Downloading %s includes..." % (inner_path, len(include_threads)))
        gevent.joinall(include_threads)
        self.log.debug("%s: Includes download ended" % inner_path)

        if check_modifications:  # Check if every file is up-to-date
            self.checkModifications(0)

        self.log.debug("%s: Downloading %s files, changed: %s..." % (inner_path, len(file_threads), len(changed)))
        gevent.joinall(file_threads)
        self.log.debug("%s: DownloadContent ended in %.2fs" % (inner_path, time.time() - s))

        if not self.worker_manager.tasks:
            self.onComplete()  # No more task trigger site complete

        return True

    # Return bad files with less than 3 retry
    def getReachableBadFiles(self):
        if not self.bad_files:
            return False
        return [bad_file for bad_file, retry in self.bad_files.iteritems() if retry < 3]

    # Retry download bad files
    def retryBadFiles(self, force=False):
        for bad_file, tries in self.bad_files.items():
            if force or random.randint(0, min(40, tries)) < 4:  # Larger number tries = less likely to check every 15min
                self.needFile(bad_file, update=True, blocking=False)

    # Download all files of the site
    @util.Noparallel(blocking=False)
    def download(self, check_size=False, blind_includes=False):
        self.log.debug(
            "Start downloading, bad_files: %s, check_size: %s, blind_includes: %s" %
            (self.bad_files, check_size, blind_includes)
        )
        gevent.spawn(self.announce)
        if check_size:  # Check the size first
            valid = self.downloadContent("content.json", download_files=False)  # Just download content.json files
            if not valid:
                return False  # Cant download content.jsons or size is not fits

        # Download everything
        valid = self.downloadContent("content.json", check_modifications=blind_includes)

        self.retryBadFiles(force=True)

        return valid

    # Update worker, try to find client that supports listModifications command
    def updater(self, peers_try, queried, since):
        while 1:
            if not peers_try or len(queried) >= 3:  # Stop after 3 successful query
                break
            peer = peers_try.pop(0)
            if not peer.connection and len(queried) < 2:
                peer.connect()  # Only open new connection if less than 2 queried already
            if not peer.connection or peer.connection.handshake.get("rev", 0) < 126:
                continue  # Not compatible
            res = peer.listModified(since)
            if not res or "modified_files" not in res:
                continue  # Failed query

            queried.append(peer)
            for inner_path, modified in res["modified_files"].iteritems():  # Check if the peer has newer files than we
                content = self.content_manager.contents.get(inner_path)
                if (not content or modified > content["modified"]) and inner_path not in self.bad_files:
                    self.log.debug("New modified file from %s: %s" % (peer, inner_path))
                    if inner_path != "content.json" and self.content_manager.getRules(inner_path) == False:
                        self.log.debug("Banned user %s: %s, skipping." % (peer, inner_path))
                        continue
                    # We dont have this file or we have older
                    self.bad_files[inner_path] = self.bad_files.get(inner_path, 0) + 1  # Mark as bad file
                    gevent.spawn(self.downloadContent, inner_path)  # Download the content.json + the changed files

    # Check modified content.json files from peers and add modified files to bad_files
    # Return: Successfully queried peers [Peer, Peer...]
    def checkModifications(self, since=None):
        peers_try = []  # Try these peers
        queried = []  # Successfully queried from these peers

        # Wait for peers
        if not self.peers:
            self.announce()
            for wait in range(10):
                time.sleep(5 + wait)
                self.log.debug("Waiting for peers...")
                if self.peers:
                    break

        peers = self.peers.values()
        random.shuffle(peers)
        for peer in peers:  # Try to find connected good peers, but we must have at least 5 peers
            if peer.findConnection() and peer.connection.handshake.get("rev", 0) > 125:  # Add to the beginning if rev125
                peers_try.insert(0, peer)
            elif len(peers_try) < 5:  # Backup peers, add to end of the try list
                peers_try.append(peer)

        if since is None:  # No since defined, download from last modification time-1day
            since = self.settings.get("modified", 60 * 60 * 24) - 60 * 60 * 24
        self.log.debug("Try to get listModifications from peers: %s since: %s" % (peers_try, since))

        updaters = []
        for i in range(3):
            updaters.append(gevent.spawn(self.updater, peers_try, queried, since))

        gevent.joinall(updaters, timeout=10)  # Wait 10 sec to workers done query modifications
        if not queried:
            gevent.joinall(updaters, timeout=10)  # Wait another 10 sec if none of updaters finished

        time.sleep(0.1)
        self.log.debug("Queried listModifications from: %s" % queried)
        return queried

    # Update content.json from peers and download changed files
    # Return: None
    @util.Noparallel()
    def update(self, announce=False):
        self.content_manager.loadContent("content.json")  # Reload content.json
        self.content_updated = None  # Reset content updated time
        self.updateWebsocket(updating=True)
        if announce:
            self.announce()

        queried = self.checkModifications()

        self.storage.checkFiles(quick_check=True)  # Quick check and mark bad files based on file size

        changed, deleted = self.content_manager.loadContent("content.json")

        if self.bad_files:
            self.log.debug("Bad files: %s" % self.bad_files)
            self.download()

        self.settings["size"] = self.content_manager.getTotalSize()  # Update site size
        self.updateWebsocket(updated=True)

    # Update site by redownload all content.json
    def redownloadContents(self):
        # Download all content.json again
        content_threads = []
        for inner_path in self.content_manager.contents.keys():
            content_threads.append(self.needFile(inner_path, update=True, blocking=False))

        self.log.debug("Waiting %s content.json to finish..." % len(content_threads))
        gevent.joinall(content_threads)

    # Publish worker
    def publisher(self, inner_path, peers, published, limit, event_done=None):
        file_size = self.storage.getSize(inner_path)
        body = self.storage.read(inner_path)
        tor_manager = self.connection_server.tor_manager
        if tor_manager.enabled and tor_manager.start_onions:
            my_ip = tor_manager.getOnion(self.address)
            if my_ip:
                my_ip += ".onion"
            my_port = config.fileserver_port
        else:
            my_ip = config.ip_external
            if self.connection_server.port_opened:
                my_port = config.fileserver_port
            else:
                my_port = 0

        while 1:
            if not peers or len(published) >= limit:
                if event_done:
                    event_done.set(True)
                break  # All peers done, or published engouht
            peer = peers.pop(0)
            if peer.connection and peer.connection.last_ping_delay:  # Peer connected
                # Timeout: 5sec + size in kb + last_ping
                timeout = timeout = 5 + int(file_size / 1024) + peer.connection.last_ping_delay
            else:  # Peer not connected
                # Timeout: 5sec + size in kb
                timeout = timeout = 5 + int(file_size / 1024)
            result = {"exception": "Timeout"}

            for retry in range(2):
                try:
                    with gevent.Timeout(timeout, False):
                        result = peer.request("update", {
                            "site": self.address,
                            "inner_path": inner_path,
                            "body": body,
                            "peer": (my_ip, my_port)
                        })
                    if result:
                        break
                except Exception, err:
                    result = {"exception": Debug.formatException(err)}

            if result and "ok" in result:
                published.append(peer)
                self.log.info("[OK] %s: %s %s/%s" % (peer.key, result["ok"], len(published), limit))
            else:
                if result == {"exception": "Timeout"}:
                    peer.onConnectionError()
                self.log.info("[FAILED] %s: %s" % (peer.key, result))