def getCookieString(self): """ Gets user's cookie string. It will then have to be passed to an Article. """ printlog(self.logger, "debug", "Logging in with username %s." % self.username) self.logintoken = None cookie_list = [] conn = ExtendedHTTPConnection(self.host, self.port, self.https) if self.httpauth_username and self.httpauth_password: conn.http_auth(self.httpauth_username, self.httpauth_password) headers = { "Content-type": "application/x-www-form-urlencoded", "User-agent": "WikipediaFS" } conn.add_headers(headers) # get login page body to receive logintoken and session cookie conn.request(self.login_page) response = conn.getresponse() printlog( self.logger, "debug", "URL: %s, response status: %d, text: %s" % (self.login_page, response.status, response.read())) while response.status == 301 or response.status == 302: # follow redirects; would be better to check for status 301 and 302 printlog( self.logger, "debug", "Redirecting to %s due to status of %d." % (response.getheader("Location"), response.status)) response.read() conn.request(response.getheader("Location")) response = conn.getresponse() printlog(self.logger, "debug", "Redirected: Status %d." % (response.status)) match = re.search('wpLoginToken"\s*value="(\w*)"', response.read()) printlog(self.logger, "debug", "Token Match: %s." % (match)) if match: self.logintoken = match.group(1) printlog(self.logger, "debug", "Login Token: %s." % (self.logintoken)) # post login data and receive login cookies # If we have a login token, then we also need to send the cookie from the initial connection. # If we don't, then doing so breaks the login. token_session = re.search('(.*?);', response.getheader("Set-Cookie")).group(1) if self.logintoken: headers["Cookie"] = token_session cookie_list.append(token_session) printlog(self.logger, "debug", "Headers:") printlog(self.logger, "debug", headers) params = { "wpName": self.username, "wpPassword": self.password, "wpRemember": "1", "wpLoginattempt": "Anmelden" } if self.logintoken: params["wpLoginToken"] = self.logintoken if self.domain: params["wpDomain"] = self.domain params = urllib.urlencode(params) conn.add_data(params) conn.add_headers(headers) conn.request(self.login_page) response = conn.getresponse() printlog( self.logger, "debug", "URL: %s, response status: %d, text: %s" % (self.login_page, response.status, response.read())) in_cookie = re.compile(': (.*?);') for cookie_value in response.msg.getallmatchingheaders("set-cookie"): it_matches = in_cookie.search(cookie_value) if it_matches: cookie_list.append(it_matches.group(1)) conn.close() printlog(self.logger, "debug", "cookie_list:") printlog(self.logger, "debug", cookie_list) if len(cookie_list) == 4: cookie_list.pop() printlog(self.logger, "info", "Logged in successfully with username %s" % self.username) #self.logger.info("; ".join(cookie_list)) return "; ".join(cookie_list) else: printlog(self.logger, "warning", "Could not log in with username %s: %s" % self.username) return None
def get(self): """ Gets the wiki content (not the whole html page). """ # Do not get article if cache is still ok if int(time.time()) - self.last_get > self.cache_time: self.logger.debug( "pre-GET wpStarttime: '%s', wpEdittime: '%s', cookies: '%s', time diff: %d\n", self.wpEdittime, self.wpStarttime, self.cookie_str, int(time.time()) - self.last_get) headers = {"User-agent": "WikipediaFS"} if self.cookie_str is not None: headers["Cookie"] = self.cookie_str conn = ExtendedHTTPConnection(self.host, self.port, self.https) if self.httpauth_username and self.httpauth_password: conn.http_auth(self.httpauth_username, self.httpauth_password) conn.add_headers(headers) conn.request(self.edit_page) #logger.info("HTTP GET %s" % self.edit_page) response = conn.getresponse() # Log http response if self.logger: self.logger.info("HTTP GET %s" % self.edit_page) # Feeds the SGMLparser self.feed(response.read()) conn.close() self.last_get = int(time.time()) self.logger.debug( "post-GET wpStarttime: '%s', wpEdittime: '%s', cookies: '%s', time diff: %d\n", self.wpEdittime, self.wpStarttime, self.cookie_str, int(time.time()) - self.last_get) else: if self.logger: self.logger.debug("Get %s from cache" % self.name) # This allows to quickly now from the fs is the article is empty if len(self.content.strip()) == 0: self.is_empty = True else: self.is_empty = False return self.content
def set(self, text): if text == self.content: return True # useless to continue further... self.logger.debug( "POST wpStarttime: '%s', wpEdittime: '%s', cookies: '%s', time diff: %d\n", self.wpEdittime, self.wpStarttime, self.cookie_str, int(time.time()) - self.last_get) # Looking for a [[Summary:*]] regexp = '((\[\[)((s|S)ummary:)(.*)(\]\])(( )*\n)?)' summary = re.search(regexp, text) if summary is not None: wpSummary = summary.group(5) text = text.replace(summary.group(1), '') else: wpSummary = " " # wpEdittime is empty if the article is a new article params = { "wpTextbox1": text, "wpSummary": wpSummary, "wpEdittime": self.wpEdittime, "wpStarttime": self.wpStarttime, "wpSave": 1 } # Needed for logged in edition if self.wpEditToken is not None: params["wpEditToken"] = self.wpEditToken params = urllib.urlencode(params) headers = { "Content-type": "application/x-www-form-urlencoded", "User-agent": "WikipediaFS" } if self.cookie_str is not None: headers["Cookie"] = self.cookie_str conn = ExtendedHTTPConnection(self.host, self.port, self.https) if self.httpauth_username and self.httpauth_password: conn.http_auth(self.httpauth_username, self.httpauth_password) conn.add_headers(headers) conn.add_data(params) conn.request(self.submit_page) response = conn.getresponse() # Log http response if self.logger: self.logger.info("HTTP POST %s" % self.submit_page) if response.status == 302: self.logger.info("Succesful") elif response.status == 200: self.logger.error("Problems occured %s\n" % response.read()) self.logger.debug("Headers: '%s'\n" % headers) self.logger.debug("Text: '%s'\n" % text) else: self.logger.info("%d \n %s " % \ (response.status,response.read())) conn.close() self.content = text # forces the article to be get next time # (wpEdittime and wpStarttime need to be updated) self.last_get = 0 # This allows to quickly now from the fs is the article is empty if len(self.content.strip()) == 0: self.is_empty = True else: self.is_empty = False # Did the write actually succeed? if response.status == 302: return True else: self.logger.debug("article.set: Returning false.\n") return False
def getCookieString(self): """ Gets user's cookie string. It will then have to be passed to an Article. """ printlog(self.logger, "debug", "Logging in with username %s." % self.username) self.logintoken = None cookie_list = [] conn = ExtendedHTTPConnection(self.host, self.port, self.https) if self.httpauth_username and self.httpauth_password: conn.http_auth(self.httpauth_username, self.httpauth_password) headers = {"Content-type": "application/x-www-form-urlencoded", "User-agent" : "WikipediaFS"} conn.add_headers(headers) # get login page body to receive logintoken and session cookie conn.request(self.login_page) response = conn.getresponse() printlog(self.logger, "debug", "URL: %s, response status: %d, text: %s" % (self.login_page, response.status, response.read())) while response.status == 301 or response.status == 302: # follow redirects; would be better to check for status 301 and 302 printlog(self.logger, "debug", "Redirecting to %s due to status of %d." % (response.getheader("Location"), response.status)) response.read() conn.request(response.getheader("Location")) response = conn.getresponse() printlog(self.logger, "debug", "Redirected: Status %d." % (response.status)) match = re.search('wpLoginToken"\s*value="(\w*)"', response.read()) printlog(self.logger, "debug", "Token Match: %s." % (match)) if match: self.logintoken = match.group(1) printlog(self.logger, "debug", "Login Token: %s." % (self.logintoken)) # post login data and receive login cookies # If we have a login token, then we also need to send the cookie from the initial connection. # If we don't, then doing so breaks the login. token_session = re.search('(.*?);', response.getheader("Set-Cookie")).group(1) if self.logintoken: headers["Cookie"] = token_session cookie_list.append(token_session) printlog(self.logger, "debug", "Headers:") printlog(self.logger, "debug", headers) params = {"wpName":self.username, "wpPassword":self.password, "wpRemember":"1", "wpLoginattempt":"Anmelden"} if self.logintoken: params["wpLoginToken"] = self.logintoken if self.domain: params["wpDomain"] = self.domain params = urllib.urlencode(params) conn.add_data(params) conn.add_headers(headers) conn.request(self.login_page) response = conn.getresponse() printlog(self.logger, "debug", "URL: %s, response status: %d, text: %s" % (self.login_page, response.status, response.read())) in_cookie = re.compile(': (.*?);') for cookie_value in response.msg.getallmatchingheaders("set-cookie"): it_matches = in_cookie.search(cookie_value) if it_matches: cookie_list.append(it_matches.group(1)) conn.close() printlog(self.logger, "debug", "cookie_list:") printlog(self.logger, "debug", cookie_list) if len(cookie_list) == 4: cookie_list.pop() printlog(self.logger, "info", "Logged in successfully with username %s" % self.username) #self.logger.info("; ".join(cookie_list)) return "; ".join(cookie_list) else: printlog(self.logger, "warning", "Could not log in with username %s: %s" % self.username) return None