Beispiel #1
0
    def get(self):
        """
        Gets the wiki content (not the whole html page).
        """

        # Do not get article if cache is still ok
        if int(time.time()) - self.last_get > self.cache_time:
            self.logger.debug(
                "pre-GET wpStarttime: '%s', wpEdittime: '%s', cookies: '%s', time diff: %d\n",
                self.wpEdittime, self.wpStarttime, self.cookie_str,
                int(time.time()) - self.last_get)
            headers = {"User-agent": "WikipediaFS"}

            if self.cookie_str is not None:
                headers["Cookie"] = self.cookie_str

            conn = ExtendedHTTPConnection(self.host, self.port, self.https)

            if self.httpauth_username and self.httpauth_password:
                conn.http_auth(self.httpauth_username, self.httpauth_password)

            conn.add_headers(headers)
            conn.request(self.edit_page)
            #logger.info("HTTP GET %s" % self.edit_page)
            response = conn.getresponse()

            # Log http response
            if self.logger:
                self.logger.info("HTTP GET %s" % self.edit_page)

            # Feeds the SGMLparser
            self.feed(response.read())
            conn.close()

            self.last_get = int(time.time())
            self.logger.debug(
                "post-GET wpStarttime: '%s', wpEdittime: '%s', cookies: '%s', time diff: %d\n",
                self.wpEdittime, self.wpStarttime, self.cookie_str,
                int(time.time()) - self.last_get)
        else:
            if self.logger:
                self.logger.debug("Get %s from cache" % self.name)

        # This allows to quickly now from the fs is the article is empty
        if len(self.content.strip()) == 0:
            self.is_empty = True
        else:
            self.is_empty = False

        return self.content
Beispiel #2
0
    def getCookieString(self):
        """
        Gets user's cookie string.
        It will then have to be passed to an Article.
        """

        printlog(self.logger, "debug",
                 "Logging in with username %s." % self.username)
        self.logintoken = None
        cookie_list = []
        conn = ExtendedHTTPConnection(self.host, self.port, self.https)

        if self.httpauth_username and self.httpauth_password:
            conn.http_auth(self.httpauth_username, self.httpauth_password)

        headers = {
            "Content-type": "application/x-www-form-urlencoded",
            "User-agent": "WikipediaFS"
        }
        conn.add_headers(headers)

        # get login page body to receive logintoken and session cookie
        conn.request(self.login_page)
        response = conn.getresponse()

        printlog(
            self.logger, "debug", "URL: %s, response status: %d, text: %s" %
            (self.login_page, response.status, response.read()))
        while response.status == 301 or response.status == 302:  # follow redirects; would be better to check for status 301 and 302
            printlog(
                self.logger, "debug",
                "Redirecting to %s due to status of %d." %
                (response.getheader("Location"), response.status))
            response.read()
            conn.request(response.getheader("Location"))
            response = conn.getresponse()
            printlog(self.logger, "debug",
                     "Redirected: Status %d." % (response.status))

        match = re.search('wpLoginToken"\s*value="(\w*)"', response.read())
        printlog(self.logger, "debug", "Token Match: %s." % (match))

        if match:
            self.logintoken = match.group(1)
            printlog(self.logger, "debug",
                     "Login Token: %s." % (self.logintoken))

        # post login data and receive login cookies

        # If we have a login token, then we also need to send the cookie from the initial connection.
        # If we don't, then doing so breaks the login.

        token_session = re.search('(.*?);',
                                  response.getheader("Set-Cookie")).group(1)
        if self.logintoken:
            headers["Cookie"] = token_session
            cookie_list.append(token_session)

        printlog(self.logger, "debug", "Headers:")
        printlog(self.logger, "debug", headers)

        params = {
            "wpName": self.username,
            "wpPassword": self.password,
            "wpRemember": "1",
            "wpLoginattempt": "Anmelden"
        }

        if self.logintoken:
            params["wpLoginToken"] = self.logintoken
        if self.domain:
            params["wpDomain"] = self.domain

        params = urllib.urlencode(params)

        conn.add_data(params)
        conn.add_headers(headers)
        conn.request(self.login_page)
        response = conn.getresponse()
        printlog(
            self.logger, "debug", "URL: %s, response status: %d, text: %s" %
            (self.login_page, response.status, response.read()))

        in_cookie = re.compile(': (.*?);')

        for cookie_value in response.msg.getallmatchingheaders("set-cookie"):
            it_matches = in_cookie.search(cookie_value)

            if it_matches:
                cookie_list.append(it_matches.group(1))

        conn.close()

        printlog(self.logger, "debug", "cookie_list:")
        printlog(self.logger, "debug", cookie_list)

        if len(cookie_list) == 4:
            cookie_list.pop()
            printlog(self.logger, "info",
                     "Logged in successfully with username %s" % self.username)
            #self.logger.info("; ".join(cookie_list))
            return "; ".join(cookie_list)
        else:
            printlog(self.logger, "warning",
                     "Could not log in with username %s: %s" % self.username)
            return None
Beispiel #3
0
    def set(self, text):
        if text == self.content:
            return True  # useless to continue further...

        self.logger.debug(
            "POST wpStarttime: '%s', wpEdittime: '%s', cookies: '%s', time diff: %d\n",
            self.wpEdittime, self.wpStarttime, self.cookie_str,
            int(time.time()) - self.last_get)

        # Looking for a [[Summary:*]]
        regexp = '((\[\[)((s|S)ummary:)(.*)(\]\])(( )*\n)?)'
        summary = re.search(regexp, text)
        if summary is not None:
            wpSummary = summary.group(5)
            text = text.replace(summary.group(1), '')
        else:
            wpSummary = " "

        # wpEdittime is empty if the article is a new article
        params = {
            "wpTextbox1": text,
            "wpSummary": wpSummary,
            "wpEdittime": self.wpEdittime,
            "wpStarttime": self.wpStarttime,
            "wpSave": 1
        }

        # Needed for logged in edition
        if self.wpEditToken is not None:
            params["wpEditToken"] = self.wpEditToken

        params = urllib.urlencode(params)

        headers = {
            "Content-type": "application/x-www-form-urlencoded",
            "User-agent": "WikipediaFS"
        }

        if self.cookie_str is not None:
            headers["Cookie"] = self.cookie_str

        conn = ExtendedHTTPConnection(self.host, self.port, self.https)

        if self.httpauth_username and self.httpauth_password:
            conn.http_auth(self.httpauth_username, self.httpauth_password)

        conn.add_headers(headers)
        conn.add_data(params)
        conn.request(self.submit_page)

        response = conn.getresponse()

        # Log http response
        if self.logger:
            self.logger.info("HTTP POST %s" % self.submit_page)
            if response.status == 302:
                self.logger.info("Succesful")
            elif response.status == 200:
                self.logger.error("Problems occured %s\n" % response.read())
                self.logger.debug("Headers: '%s'\n" % headers)
                self.logger.debug("Text: '%s'\n" % text)
            else:
                self.logger.info("%d \n %s " % \
                                    (response.status,response.read()))

        conn.close()

        self.content = text

        # forces the article to be get next time
        # (wpEdittime and wpStarttime need to be updated)
        self.last_get = 0

        # This allows to quickly now from the fs is the article is empty
        if len(self.content.strip()) == 0:
            self.is_empty = True
        else:
            self.is_empty = False

        # Did the write actually succeed?
        if response.status == 302:
            return True
        else:
            self.logger.debug("article.set: Returning false.\n")
            return False
Beispiel #4
0
    def getCookieString(self):
        """
        Gets user's cookie string.
        It will then have to be passed to an Article.
        """

        printlog(self.logger, "debug", "Logging in with username %s." % self.username)
        self.logintoken = None
        cookie_list = []
        conn = ExtendedHTTPConnection(self.host, self.port, self.https)

        if self.httpauth_username and self.httpauth_password:
            conn.http_auth(self.httpauth_username, self.httpauth_password)

        headers = {"Content-type": "application/x-www-form-urlencoded",
                   "User-agent" : "WikipediaFS"}
        conn.add_headers(headers)

        # get login page body to receive logintoken and session cookie
        conn.request(self.login_page)
        response = conn.getresponse()

        printlog(self.logger, "debug", "URL: %s, response status: %d, text: %s" % (self.login_page, response.status, response.read()))
        while response.status == 301 or response.status == 302: # follow redirects; would be better to check for status 301 and 302
            printlog(self.logger, "debug", "Redirecting to %s due to status of %d." % (response.getheader("Location"), response.status))
            response.read()
            conn.request(response.getheader("Location"))
            response = conn.getresponse()
            printlog(self.logger, "debug", "Redirected: Status %d." % (response.status))

        match = re.search('wpLoginToken"\s*value="(\w*)"',
                                response.read())
        printlog(self.logger, "debug", "Token Match: %s." % (match))

        if match:
            self.logintoken = match.group(1)
            printlog(self.logger, "debug", "Login Token: %s." % (self.logintoken))

        # post login data and receive login cookies

        # If we have a login token, then we also need to send the cookie from the initial connection.
        # If we don't, then doing so breaks the login.

        token_session = re.search('(.*?);', response.getheader("Set-Cookie")).group(1)
        if self.logintoken:
            headers["Cookie"] = token_session
            cookie_list.append(token_session)

        printlog(self.logger, "debug", "Headers:")
        printlog(self.logger, "debug", headers)

        params = {"wpName":self.username, "wpPassword":self.password,
                  "wpRemember":"1", "wpLoginattempt":"Anmelden"}

        if self.logintoken:
            params["wpLoginToken"] = self.logintoken
        if self.domain:
            params["wpDomain"] = self.domain

        params = urllib.urlencode(params)

        conn.add_data(params)
        conn.add_headers(headers)
        conn.request(self.login_page)
        response = conn.getresponse()
        printlog(self.logger, "debug", "URL: %s, response status: %d, text: %s" % (self.login_page, response.status, response.read()))

        in_cookie = re.compile(': (.*?);')

        for cookie_value in response.msg.getallmatchingheaders("set-cookie"):
            it_matches = in_cookie.search(cookie_value)

            if it_matches:
                cookie_list.append(it_matches.group(1))

        conn.close()

        printlog(self.logger, "debug", "cookie_list:")
        printlog(self.logger, "debug", cookie_list)

        if len(cookie_list) == 4:
            cookie_list.pop()
            printlog(self.logger, "info",
                     "Logged in successfully with username %s" % self.username)
                #self.logger.info("; ".join(cookie_list))
            return "; ".join(cookie_list)
        else:
            printlog(self.logger, "warning",
                     "Could not log in with username %s: %s" % self.username)
            return None