def grab_ticket(params, wait, lock, thread_id): global stop cj = CookieJar() opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) buy_params, headers = init_session(opener, params) while stop == False and buy_params == None: cj.clear() buy_params, headers = init_session(opener, params) # wait wait_time = (23 * 60 + 30) * 60 + 45 while stop == False and wait == True: now_time = datetime.now() current_time = (now_time.hour * 60 + now_time.minute) * 60 + now_time.second if current_time >= wait_time: cj.clear() buy_params, headers = init_session(opener, params) break # start while stop == False and buy_ticket(opener, buy_params, headers, lock, thread_id) == False: diff_days = abs(datetime.now().date() - datetime.strptime( buy_params['getin_date'][:10], '%Y/%m/%d').date()).days buy_params['getin_date'] = '%s-%d' % (buy_params['getin_date'][:10], diff_days) time.sleep(3) stop = True thread.exit()
def clear(self, domain=None, path=None, name=None): if issubclass(CookieJar, object): super(KeyringCookieJar, self).clear(domain, path, name) else: # old-style class in Python 2 CookieJar.clear(self, domain, path, name) self.nuke() self.save()
class Yad2Client(object): def __init__(self): proxy = ProxyHandler(PROXY) self.cj = CookieJar() opener = build_opener(HTTPCookieProcessor(self.cj), proxy) install_opener(opener) def add_cookie(self, name, value): cookie = Cookie(version=0, name=name, value=value, port=None, port_specified=False, domain='yad2.co.il', domain_specified=False, domain_initial_dot=False, path='/', path_specified=True, secure=False, expires=None, discard=True, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False) self.cj.set_cookie(cookie) def clear_cookies(self): self.cj.clear() def get_url(self, url, headers={}, args={}): headers["Host"] = "m.yad2.co.il" headers[ "Accept"] = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8" headers[ "User-Agent"] = "Mozilla/5.0 (Linux; Android 4.2.2; Android SDK built for x86 Build/KK) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/30.0.0.0 Mobile Safari/537.36" headers["Accept-Language"] = "en-US" args = args or {} args["DeviceType"] = "Redmi Note 3" args["AppVersion"] = "2.9" args["AppType"] = "Android" args["OSVersion"] = "5.0.2" args["udid"] = "582ffa3d-a4cf-425a-8b36-9874d7464015" url = url + "?" + urlencode(args) req = Request(url, headers=headers) response = urlopen(req) return response.read()
def get_cookies_from_response(url): cookiejar = CookieJar() opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookiejar)) opener.open(url) # add a new cookie or replace a old one newcookie = make_cookie('newcookie', '11111', '.baidu.com', '/') # remove a cookie cookiejar.set_cookie(newcookie) cookiejar.clear('.baidu.com', '/', 'newcookie') return cookiejar
def get_cookies_from_response(url): cookiejar = CookieJar() opener = urllib2.build_opener( urllib2.HTTPCookieProcessor(cookiejar)) opener.open(url) # add a new cookie or replace a old one newcookie = make_cookie('newcookie', '11111', '.baidu.com', '/') # remove a cookie cookiejar.set_cookie(newcookie) cookiejar.clear('.baidu.com', '/', 'newcookie') return cookiejar
class Yad2Client(object): def __init__(self): proxy = ProxyHandler(PROXY) self.cj = CookieJar() opener = build_opener(HTTPCookieProcessor(self.cj), proxy) install_opener(opener) def add_cookie(self, name, value): cookie = Cookie(version=0, name=name, value=value, port=None, port_specified=False, domain='yad2.co.il', domain_specified=False, domain_initial_dot=False, path='/', path_specified=True, secure=False, expires=None, discard=True, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False) self.cj.set_cookie(cookie) def clear_cookies(self): self.cj.clear() def get_url(self, url, headers = {}, args = {}): headers["Host"] = "m.yad2.co.il" headers["Accept"] = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8" headers["User-Agent"] = "Mozilla/5.0 (Linux; Android 4.2.2; Android SDK built for x86 Build/KK) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/30.0.0.0 Mobile Safari/537.36" headers["Accept-Language"] = "en-US" args = args or {} args["DeviceType"] = "Redmi Note 3" args["AppVersion"] = "2.9" args["AppType"] = "Android" args["OSVersion"] = "5.0.2" args["udid"] = "582ffa3d-a4cf-425a-8b36-9874d7464015" url = url + "?" + urlencode(args) req = Request(url, headers = headers) response = urlopen(req) return response.read()
class HLSFetcher(object): def __init__(self, url, **options): self.program = options.get('program',1) self.hls_headers = options.get('headers',{}) self.path = options.get('path',None) self.bitrate = options.get('bitrate',200000) self.nbuffer = options.get('buffer',5) self.n_segments_keep = options.get('keep',self.nbuffer+1) url = urllib.unquote(url) self.puser = options.get('puser') self.ppass = options.get('ppass') self.purl = options.get('purl') us = url.split('|') if len(us) > 1: self.url = us[0] for hd in us[1:]: self.hls_headers.update(dict(urlparse.parse_qsl(hd.strip()))) else: self.url = url self.agent = self.hls_headers.pop('User-Agent', getUserAgent()) if not self.path: self.path = tempfile.mkdtemp() self._program_playlist = None self._file_playlist = None self._cookies = CookieJar() self._cached_files = {} # sequence n -> path self._run = True self._poolHelper = TwHTTP11PoolHelper(retryAutomatically=True) self._files = None # the iter of the playlist files download self._next_download = None # the delayed download defer, if any self._file_playlisted = None # the defer to wait until new files are added to playlist self._new_filed = None self._seg_task = None def _get_page(self, url): url = url.encode("utf-8") if 'HLS_RESET_COOKIES' in os.environ.keys(): self._cookies.clear() timeout = 10 return twAgentGetPage(url, agent=self.agent, cookieJar=self._cookies, headers=self.hls_headers, timeout=timeout, pool=self._poolHelper._pool, proxy_url=self.purl, p_user=self.puser, p_pass=self.ppass) def _download_page(self, url, path, file): def _decrypt(data): def num_to_iv(n): iv = struct.pack(">8xq", n) return b"\x00" * (16 - len(iv)) + iv if not self._file_playlist._iv: iv = num_to_iv(file['sequence']) aes = AES.new(self._file_playlist._key, AES.MODE_CBC, iv) else: aes = AES.new(self._file_playlist._key, AES.MODE_CBC, self._file_playlist._iv) return aes.decrypt(data) d = self._get_page(url) if self._file_playlist._key: d.addCallback(_decrypt) return d def _download_segment(self, f): url = make_url(self._file_playlist.url, f['file']) name = 'seg_' + next(tempfile._get_candidate_names()) path = os.path.join(self.path, name) d = self._download_page(url, path, f) if self.n_segments_keep != 0: file = open(path, 'wb') d.addCallback(lambda x: file.write(x)) d.addBoth(lambda _: file.close()) d.addCallback(lambda _: path) d.addErrback(self._got_file_failed) d.addCallback(self._got_file, url, f) else: d.addCallback(lambda _: (None, path, f)) return d def delete_cache(self, f): bgFileEraser = eBackgroundFileEraser.getInstance() keys = self._cached_files.keys() for i in ifilter(f, keys): filename = self._cached_files[i] bgFileEraser.erase(str(filename)) del self._cached_files[i] def delete_all_cache(self): bgFileEraser = eBackgroundFileEraser.getInstance() for path in self._cached_files.itervalues(): bgFileEraser.erase(str(path)) self._cached_files.clear() def _got_file_failed(self, e): if self._new_filed: self._new_filed.errback(e) self._new_filed = None def _got_file(self, path, url, f): self._cached_files[f['sequence']] = path if self.n_segments_keep != -1: self.delete_cache(lambda x: x <= f['sequence'] - self.n_segments_keep) if self._new_filed: self._new_filed.callback((path, url, f)) self._new_filed = None return (path, url, f) def _get_next_file(self): next = self._files.next() if next: return self._download_segment(next) elif not self._file_playlist.endlist(): self._seg_task.stop() self._file_playlisted = defer.Deferred() self._file_playlisted.addCallback(lambda x: self._get_next_file()) self._file_playlisted.addCallback(self._next_file_delay) self._file_playlisted.addCallback(self._seg_task.start) return self._file_playlisted def _handle_end(self, failure): failure.trap(StopIteration) print "End of media" def _next_file_delay(self, f): if f == None: return 0 delay = f[2]["duration"] if self.nbuffer > 0: for i in range(0,self.nbuffer): if self._cached_files.has_key(f[2]['sequence'] - i): return delay delay = 0 elif self._file_playlist.endlist(): delay = 1 return delay def _get_files_loop(self, res=None): if not self._seg_task: self._seg_task = task.LoopingCall(self._get_next_file) d = self._get_next_file() if d != None: self._seg_task.stop() d.addCallback(self._next_file_delay) d.addCallback(self._seg_task.start) d.addErrback(self._handle_end) def _playlist_updated(self, pl): if pl and pl.has_programs(): # if we got a program playlist, save it and start a program self._program_playlist = pl (program_url, _) = pl.get_program_playlist(self.program, self.bitrate) return self._reload_playlist(M3U8(program_url, self._cookies, self.hls_headers)) elif pl and pl.has_files(): # we got sequence playlist, start reloading it regularly, and get files self._file_playlist = pl if not self._files: self._files = pl.iter_files() if not pl.endlist(): reactor.callLater(pl.reload_delay(), self._reload_playlist, pl) if self._file_playlisted: self._file_playlisted.callback(pl) self._file_playlisted = None else: raise Exception('Playlist has no valid content.') return pl def _got_playlist_content(self, content, pl): if not pl.update(content) and self._run: # if the playlist cannot be loaded, start a reload timer d = deferLater(reactor, pl.reload_delay(), self._fetch_playlist, pl) d.addCallback(self._got_playlist_content, pl) return d return pl def _fetch_playlist(self, pl): d = self._get_page(pl.url) return d def _reload_playlist(self, pl): if self._run: d = self._fetch_playlist(pl) d.addCallback(self._got_playlist_content, pl) d.addCallback(self._playlist_updated) return d else: return None def get_file(self, sequence): d = defer.Deferred() keys = self._cached_files.keys() try: endlist = sequence == self._file_playlist._end_sequence sequence = ifilter(lambda x: x >= sequence, keys).next() filename = self._cached_files[sequence] d.callback((filename, endlist)) except: d.addCallback(lambda x: self.get_file(sequence)) self._new_filed = d keys.sort() return d def _start_get_files(self, x): self._new_filed = defer.Deferred() self._get_files_loop() return self._new_filed def start(self): if self._run: self._files = None d = self._reload_playlist(M3U8(self.url, self._cookies, self.hls_headers)) d.addCallback(self._start_get_files) return d def stop(self): self._run = False self._poolHelper.close() if self._seg_task != None: self._seg_task.stop() if self._new_filed != None: self._new_filed.cancel() reactor.callLater(1, self.delete_all_cache)
class Downloader(object): def __init__(self, username=None, password=None, debug=False, naptime=True, user_agent=DEFAULT_USER_AGENT): self.sleep_after_request = naptime self.user_agent = user_agent self.debug = debug # Try setting the username from args, if missing check the authfile self.username = username or auth.lc_username self.password = password or auth.lc_password self.logged_in = False self.cookie_jar = CookieJar() if self.debug: # Noisy HTTPS handler for debugging self.url_opener = build_opener( HTTPCookieProcessor(self.cookie_jar), HTTPSHandler(debuglevel=1)) else: self.url_opener = build_opener( HTTPCookieProcessor(self.cookie_jar)) self.url_opener.addheaders = [ ('User-Agent', self.user_agent) ] logging.info('Downloader intialized.') def open_url(self, url, data=None, method='GET'): """ Consistent place to introduce request throttling and other HTTP magic """ if method == 'GET': if data: url += "?" + urlencode(data, True) response = self.url_opener.open(url) elif method == 'POST': response = self.url_opener.open(url, data=urlencode(data)) else: raise ValueError("%s is not a valid HTTP method" % method) if self.sleep_after_request: sleep_time = random.randint(MIN_SLEEP, MAX_SLEEP) logging.debug('Taking a nap for %s seconds', sleep_time) time.sleep(sleep_time) return response def verify_login(self, resp=None): """ Tries to fetch the Account Summary page, returns true if it succeeds Args: resp (HTTPResponse, optional) - reuse resp instead of re-querying Returns: True if the we're actually logged in; also updates self.logged_in """ if not resp: resp = self.open_url(ACCOUNT_SUMMARY_URL) resp_text = resp.read() # Look for a known tag that appears only for logged in users if resp_text.find(LOGGED_IN_VALIDATION) >= 0: self.logged_in = True else: self.logged_in = False return self.logged_in def logout(self): try: self.cookie_jar.clear('.lendingclub.com') except KeyError: pass logging.debug('Cleared cookies') def login(self, invalidate_session=False, retries=5): """ Sets an actively logged in session with Lending Club. Login Steps: 1. Get a set of session cookies by visiting ACCOUNT_SUMMARY_URL 2. Authenticate the session cookies with a username / password If self.logged_in is already set, this will verify that we're logged in Args: invalidate_session (bool): will clear cookies and log in with a new session. retries (int): number of times to retry on unsuccessful login Returns: True if login was successful, also updates self.logged_in """ if self.logged_in and not invalidate_session and self.verify_login(): # Ensure we're logged in and aren't trying to reset our session logging.debug('Ensuring that we already have an active session') return self.logged_in if not self.logged_in or invalidate_session: # Start a new session: clear cookies, and get send a fresh request self.logout() logging.debug('Starting a fresh Lending Club session..') self.open_url(ACCOUNT_SUMMARY_URL) logging.debug('session cookies: %s', self.cookie_jar) attempt = 1 while attempt <= retries: # Username and password only need to input once if not self.username: self.username = raw_input('Lending Club username:\n') if not self.password: self.password = getpass('Password:\n') data = { 'login_url': ACCOUNT_SUMMARY_URL, 'login_email': self.username, 'login_password': self.password, 'login_remember_me': 'off', } response = self.open_url(LOGIN_URL, data, 'POST') # Validate the login attempt if self.verify_login(response): self.logged_in = True # We dont need the LC_FIRSTNAME cookie that was just set self.cookie_jar.clear('.lendingclub.com', '/', 'LC_FIRSTNAME') logging.info('Successfully logged in as %s', self.username) break else: self.username = None self.password = None self.logged_in = False if attempt < retries: logging.warning( 'Login attempt %s of %s failed. Will try again.', attempt, retries) else: logging.critical('Last login attempt %s failed.', attempt) attempt += 1 return self.logged_in def set_query_params(self): """ Before making requests to NOTES_URL, we need to set the high-level search params, like the interest rates and loan status. Query params are associated with the session on the server-side. """ request_params = { 'mode': 'search', 'search_from_rate': '0.04', 'search_to_rate': '0.26', 'search_status': ['status_always_current', 'satus_current', 'status_late_16_30', 'status_late_31_120'], 'search_remaining_payments': '60', 'x': '23', 'y': '10', } logging.debug('Setting up the query params..') self.open_url(QUERY_PARAMS_URL, request_params) def get_page_of_notes(self, sort='ytm', sort_dir='desc', offset=0, limit=10, retries=5): """ Given a session cookie, get a page of results in a JSON format """ request_params = { 'sortBy': sort, 'dir': sort_dir, 'newrdnnum': random.randint(10000000, 90000000), # What is this? 'startindex': offset, 'pagesize': limit, } QUERY_STATUS_KEY = 'result' attempt = 1 while attempt <= retries: try: response = self.open_url(NOTES_URL, request_params) response_data = response.readline() json_data = json.loads(response_data) query_status = json_data.get(QUERY_STATUS_KEY) if query_status == 'success': return json_data except Exception as e: log_line = '[%d/%d]: Error parsing response: %s\n RESP: %s' % ( attempt, retries, e, response_data) logging.warning(log_line) else: log_line = '[%d/%d] Failed to fetch data. \n RESP: %s' % ( attempt, retries, json_data) logging.warning(log_line) attempt += 1 # Escalate logging to ERROR if we fail fetching after many retries logging.critical('Error fetching page of notes after %d tries.\n > %s', retries, log_line) return {} def download_data(self, max_records=1000, pagesize=1000, ignore_neg_ytm=False): """ Paginate through enough pages of results to get the desired number of records. Optionally ignore negative YTM to reduce the result set. """ RECORD_COUNT_KEY = 'totalRecords' RESULT_SET_KEY = 'searchresult' LOANS_KEY = 'loans' # ensure we're logged in self.login() # Set the high-level search query params self.set_query_params() # How many results match the query? logging.info('Fetching the total matching record count for the query') total_record_count = int( self.get_page_of_notes(limit=1).get(RECORD_COUNT_KEY, 0)) # How many results do we plan to fetch? record_limit = min(max_records, total_record_count) logging.info('Fetching up to %s of %s matching records', record_limit, total_record_count) all_records = {} offset = 0 while offset < record_limit: logging.debug('Fetched %s; getting %s more records from the site', len(all_records), pagesize) # Set the query arguments and fetch the data in a nice dict query_args = {'offset': offset, 'limit': pagesize, } if ignore_neg_ytm: # Start with positive YTM and descend, # this allows ignoring negatives query_args['sort'] = 'ytm' query_args['sort_dir'] = 'desc' fetched_data = self.get_page_of_notes(**query_args) # Break out early if we're not getting sensible results if not fetched_data: break # Get a list of records from the result fetched_records = fetched_data.get( RESULT_SET_KEY, {}).get(LOANS_KEY, []) for record in fetched_records: # Break out if we've fetched all of the positive YTM records if ignore_neg_ytm and ( (record.get('ytm') == 'null') or (float(record.get('ytm', 0)) < 0)): logging.info('Fetched all %s records with a positive YTM', len(all_records)) return all_records record_id = record.get('noteId') if record_id in all_records: raise KeyError("Looks like we got a duplicate record: %s" % record) all_records[record_id] = record offset += pagesize return all_records def download_historical_loan_data(self): logging.info('Downloading file from %s..', LOAN_DATA_CSV_URL) urlretrieve(LOAN_DATA_CSV_URL, LOAN_DATA_CSV_TMPFILE) logging.info('Done writing to %s', LOAN_DATA_CSV_TMPFILE) return parse_loan_data_from_file(LOAN_DATA_CSV_TMPFILE)
class Network(DOMMixin): capabilities = [ 'cookies', 'headers', ] wait_expression = WaitExpression user_agent = { 'browser': 'network', 'platform': 'python', 'version': '1.0', } def __init__(self, base_url=None): # accept additional request headers? (e.g. user agent) self._base_url = base_url self.reset() def open(self, url, wait_for=None, timeout=0): """Open web page at *url*.""" self._open(url) def reset(self): self._referrer = None self._request_environ = None self._cookie_jar = CookieJar() self._opener = urllib2.build_opener( urllib2.HTTPCookieProcessor(self._cookie_jar) ) self.status_code = 0 self.status = '' self.response = None self.location = None self.headers = () def wait_for(self, condition, timeout=None): pass def sync_document(self): """The document is always synced.""" _sync_document = DOMMixin.sync_document @property def cookies(self): if not (self._cookie_jar and self.location): return {} request = urllib2.Request(self.location) policy = self._cookie_jar._policy # return ok will only return a cookie if the following attrs are set # correctly => # "version", "verifiability", "secure", "expires", # "port", "domain" return dict((c.name, c.value.strip('"')) for c in self._cookie_jar if policy.return_ok(c, request)) def set_cookie(self, name, value, domain=None, path=None, session=True, expires=None, port=None): # Cookie(version, name, value, port, port_specified, # domain, domain_specified, domain_initial_dot, # path, path_specified, secure, expires, # discard, comment, comment_url, rest, # rfc2109=False): cookie = Cookie(0, name, value, port, bool(port), domain or '', bool(domain), (domain and domain.startswith('.')), path or '', bool(path), False, expires, session, None, None, {}, False) self._cookie_jar.set_cookie(cookie) def delete_cookie(self, name, domain=None, path=None): try: self._cookie_jar.clear(domain, path, name) except KeyError: pass # Internal methods @lazy_property def _lxml_parser(self): return html_parser_for(self, wsgi_elements) def _open(self, url, method='GET', data=None, refer=True, content_type=None): before_browser_activity.send(self) open_started = time() if data: data = urlencode(data) url = urljoin(self._base_url, url) if method == 'GET': if '?' in url: url, query_string = url.split('?', 1) else: query_string = None if data: query_string = data if query_string: url = url + '?' + query_string request = urllib2.Request(url) elif method == 'POST': request = urllib2.Request(url, data) else: raise Exception('Unsupported method: %s' % method) if self._referrer and refer: request.add_header('Referer', self._referrer) logger.info('%s(%s)', url, method) request_started = time() response = self._opener.open(request) request_ended = time() self.status_code = response.getcode() self.headers = Headers( (head.strip().split(': ',1) for head in response.info().headers) ) self._referrer = request.get_full_url() self.location = response.geturl() self._response = response self.response = ''.join(list(response)) self._sync_document() open_ended = time() request_time = request_ended - request_started logger.info("Fetched %s in %0.3fsec + %0.3fsec browser overhead", url, request_time, open_ended - open_started - request_time) after_browser_activity.send(self)
class GHttp(): def __init__(self): """ class initialisation, creates cookie jar and headers """ self.lastpage = None self.lasterror = None self.cj = CookieJar() self.cookieH = urllib2.HTTPCookieProcessor(self.cj) self.redirectH = urllib2.HTTPRedirectHandler() self.proxyH = None self.opener = urllib2.build_opener(self.cookieH, self.redirectH) def addproxy(self, proxyipport): self.proxyH = urllib2.ProxyHandler({'http': proxyipport}) self.opener = urllib2.build_opener(self.cookieH, self.redirectH, self.proxyH) if self.rq('http://google.com') is None: return False return True def removeproxy(self): """ Removes the currently set proxy """ self.proxyH = None self.opener = urllib2.build_opener(self.cookieH, self.redirectH) def clearcookies(self): """ clears all cookies from the cookie jar :) """ self.cj.clear() def rq(self, url, ref=None, data=None): """ Http request, it either returns response html or none if there's an error. Keyword arguments: url -- the url you want to request data -- this is for the POST method, data that you will be seding ref -- the referer to your request page, if none specified it will use last page's url or the current url (default None) """ # reset lasterror self.lasterror = None # set the referrer if ref is None: if self.lastpage is None: self.lastpage = url ref = self.lastpage self.opener.addheaders = [ ('Referer', ref), ('User-Agent', 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.597.98 Safari/534.13' ) ] # if data is a dictionary type we will use urllibe to encode it # to url format if isinstance(data, dict): data = urllib.urlencode(data) # catch exceptions so program does not crash try: if data is not None: opnr = self.opener.open(url, data=data) else: opnr = self.opener.open(url) except urllib2.HTTPError, e: self.lasterror = 'The server couldn\'t fulfill the request.' + \ 'Error code: %s' % e.code return None except urllib2.URLError, e: self.lasterror = 'We failed to reach a server. Reason: %s' % e.reason return None
class TestClassBase(unittest.TestCase): def setUp(self): self.maxDiff = None self.execOnTearDown = [] self.cookies = CookieJar() self.opener = urllib2.build_opener( urllib2.HTTPCookieProcessor(self.cookies)) self._assert_unlogged() self._login(admin_data[0], 'administrator') self._assert_logged(admin_data[0]) self.admin_data = admin_data def _login(self, data, group): self.cookies.clear() encoded_credentials = urllib.urlencode({ 'username': data['username'], 'password': data['password'] }) self.opener.open(url + "/auth/login", encoded_credentials) self.group = group def _assert_logged(self, data, notifications=None): json_out = self._request('/me', {}) if notifications != None: self.assertEqual( { 'username': json_out['username'], 'group': json_out['group'], 'notifications': json_out['notifications'] }, { 'username': data['username'], 'group': self.group, 'notifications': notifications }) else: self.assertEqual( { 'username': json_out['username'], 'group': json_out['group'] }, { 'username': data['username'], 'group': self.group }) def _assert_unlogged(self): self.assertEqual( urllib2.urlopen(url + '/me').geturl(), url + '/auth/login') def _request(self, uri, json_in): request = urllib2.Request('https://localhost:9090/%s' % uri.lstrip('/'), data=json.dumps(json_in), headers={'Content-Type': 'application/json'}) return json.loads(self.opener.open(request).read()) def _assert_req(self, uri, json_in, json_expected): json_out = self._request(uri, json_in) # import pprint # print 'RETURNED:', pprint.pprint(clean_id(copy.deepcopy(json_out))) # print 'EXPECTED:', pprint.pprint(json_expected) # print 'EQ:', clean_id(copy.deepcopy(json_out)) == json_expected self.assertEqual(clean_id(copy.deepcopy(json_out)), json_expected) return json_out def tearDown(self): self._login(self.admin_data[0], 'administrator') self._assert_logged(self.admin_data[0]) for command in self.execOnTearDown: uri, json_in, json_expected = command self.assertEqual(clean_id(self._request(uri, json_in)), json_expected) def _plain_request(self, uri=''): return self.opener.open('https://localhost:9090/' + uri).read()
class Downloader(object): def __init__(self, username=None, password=None, debug=False, naptime=True, user_agent=DEFAULT_USER_AGENT): self.sleep_after_request = naptime self.user_agent = user_agent self.debug = debug # Try setting the username from args self.username = username self.password = password self.logged_in = False self.cookie_jar = CookieJar() if self.debug: # Noisy HTTPS handler for debugging self.url_opener = build_opener( HTTPCookieProcessor(self.cookie_jar), HTTPSHandler(debuglevel=1)) else: self.url_opener = build_opener( HTTPCookieProcessor(self.cookie_jar)) self.url_opener.addheaders = [ ('User-Agent', self.user_agent) ] logging.info('Downloader intialized.') def open_url(self, url, data=None, method='GET', verify=False): """ Consistent place to introduce request throttling and other HTTP magic """ if method != 'GET' and method != 'POST': raise ValueError("%s is not a valid HTTP method" % method) attempt = 1 while attempt <= TIMEOUT_RETRY: try: if method == 'GET': dataurl = url if data: dataurl += "?" + urlencode(data, True) response = self.url_opener.open(dataurl, timeout=TIMEOUT) elif method == 'POST': response = self.url_opener.open(url, data=urlencode(data), timeout=TIMEOUT) if self.sleep_after_request: sleep_time = random.randint(MIN_SLEEP, MAX_SLEEP) #logging.debug('Taking a nap for %s seconds', sleep_time) time.sleep(sleep_time) return response except socket.timeout as e: logging.warning("Timeout error: %s, [%d/%d] while fetching url %s with data %s", type(e), attempt, TIMEOUT_RETRY, url, data) except Exception as e: logging.warning("Error caught: %s, [%d/%d] while fetching url %s with data %s", type(e), attempt, TIMEOUT_RETRY, url, data) else: logging.warning("Failed to fetch url %s with data %s, [%d/%d]", url, data, attempt, TIMEOUT_RETRY) attempt = attempt + 1 if (attempt <= TIMEOUT_RETRY): time.sleep(TIMEOUT_SLEEP) if verify and not self.verify_login(): self.login() # end attemp logging.critical('Error fetching url %s with data %s after %d tries.', url, str(data), TIMEOUT_RETRY) return {} def verify_login(self, resp=None): """ Tries to fetch the Account Summary page, returns true if it succeeds Args: resp (HTTPResponse, optional) - reuse resp instead of re-querying Returns: True if the we're actually logged in; also updates self.logged_in """ response_got = False while not response_got: if not resp: resp = self.open_url(ACCOUNT_SUMMARY_URL) try: resp_text = resp.read() response_got = True except: logging.warning("verify_login: cannot read verify response") # end response_got # Look for a known tag that appears only for logged in users if resp_text.find(LOGGED_IN_VALIDATION) >= 0: self.logged_in = True else: self.logged_in = False return self.logged_in def logout(self): try: self.cookie_jar.clear('.lendingclub.com') except KeyError: pass logging.debug('Cleared cookies') def login(self, invalidate_session=False, retries=5): """ Sets an actively logged in session with Lending Club. Login Steps: 1. Get a set of session cookies by visiting ACCOUNT_SUMMARY_URL 2. Authenticate the session cookies with a username / password If self.logged_in is already set, this will verify that we're logged in Args: invalidate_session (bool): will clear cookies and log in with a new session. retries (int): number of times to retry on unsuccessful login Returns: True if login was successful, also updates self.logged_in """ if self.logged_in and not invalidate_session and self.verify_login(): # Ensure we're logged in and aren't trying to reset our session logging.debug('Ensuring that we already have an active session') return self.logged_in if not self.logged_in or invalidate_session: # Start a new session: clear cookies, and get send a fresh request self.logout() logging.debug('Starting a fresh Lending Club session..') self.open_url(ACCOUNT_SUMMARY_URL) logging.debug('session cookies: %s', self.cookie_jar) attempt = 1 while attempt <= retries: # Username and password only need to input once if not self.username: self.username = raw_input('Lending Club username:\n') if not self.password: self.password = getpass('Password:\n') data = { 'login_url': ACCOUNT_SUMMARY_URL, 'login_email': self.username, 'login_password': self.password, 'login_remember_me': 'off', } response = self.open_url(LOGIN_URL, data, 'POST') # Validate the login attempt if self.verify_login(response): self.logged_in = True # We dont need the LC_FIRSTNAME cookie that was just set self.cookie_jar.clear('.lendingclub.com', '/', 'LC_FIRSTNAME') logging.info('Successfully logged in as %s', self.username) break else: self.username = None self.password = None self.logged_in = False if attempt < retries: logging.warning( 'Login attempt %s of %s failed. Will try again.', attempt, retries) else: logging.critical('Last login attempt %s failed.', attempt) attempt += 1 return self.logged_in def set_query_params(self): """ Before making requests to NOTES_URL, we need to set the high-level search params, like the interest rates and loan status. Query params are associated with the session on the server-side. """ request_params = { 'mode': 'search', 'search_from_rate': '0.04', 'search_to_rate': '0.29', 'fil_search_term':['term_36', 'term_60'], 'search_loan_term':['term_36', 'term_60'], 'opr_min':0.00, 'opr_max':'Any', 'loan_status':['loan_status_issued', 'loan_status_late_16_30', 'loan_status_current', 'loan_status_late_31_120', 'loan_status_ingrace'], 'remp_min':1, 'remp_max':60, 'askp_min':0.00, 'askp_max':'Any', 'credit_score_min':600, 'credit_score_max':850, 'ytm_min':0, 'ytm_max':'Any', 'credit_score_trend':['UP', 'DOWN', 'FLAT'], 'markup_dis_min':-100, 'markup_dis_max':100, 'ona_min':25, 'ona_max':'Any' } logging.debug('Setting up the query params..') self.open_url(QUERY_PARAMS_URL, request_params) def get_page_of_notes(self, sort='opa', sort_dir='asc', offset=0, limit=15, retries=5): """ Given a session cookie, get a page of results in a JSON format """ request_params = { 'sortBy': sort, 'dir': sort_dir, 'newrdnnum': random.randint(10000000, 90000000), # What is this? 'startindex': offset, 'pagesize': limit, } QUERY_STATUS_KEY = 'result' response = self.open_url(NOTES_URL, request_params, verify = True) try: response_data = response.readline() json_data = json.loads(response_data) query_status = json_data.get(QUERY_STATUS_KEY) if query_status == 'success': return json_data except Exception as e: log_line = 'Error parsing response: %s\n RESP: %s' % ( e, response_data) logging.warning(log_line) else: log_line = 'Failed to fetch data. \n RESP: %s' % ( json_data) logging.warning(log_line) # Escalate logging to ERROR if we fail fetching after many retries logging.critical('Error fetching page of notes after %d tries.\n > %s', retries, log_line) return {} def get_note_details(self, record): request_params = { 'loan_id': record.get('loanGUID'), 'order_id': record.get('orderId'), 'note_id': record.get('noteId'), 'showfoliofn': 'true' } QUERY_STATUS_KEY = 'result' response = self.open_url(NOTE_INFO_BASE_URL, request_params, verify = True) try: response_page = response.read() note_parser = NoteHTMLParser(response_page) note_info = note_parser.get_info() query_status = note_info.get(QUERY_STATUS_KEY) if query_status == True: return note_info except Exception as e: log_line = 'get_note_details: Error parsing response: %s\n RESP: %s' % ( e, response) logging.warning(log_line) else: log_line = 'get_note_details: Failed to parse the response data for record %s' % ( record) logging.warning(log_line) return {} def get_loan_details(self, record): request_params = { 'loan_id': record.get('loanGUID') } QUERY_STATUS_KEY = 'result' response = self.open_url(LOAN_INFO_BASE_URL, request_params, verify = True) try: response_page = response.read() loan_parser = LoanHTMLParser(response_page) loan_info = loan_parser.get_info() query_status = loan_info.get(QUERY_STATUS_KEY) if query_status == True: return loan_info except Exception as e: log_line = 'get_loan_details: Error parsing response: %s\n RESP: %s' % ( e, response) logging.warning(log_line) else: log_line = 'get_loan_details: Failed to fetch data for record %s' % ( record) logging.warning(log_line) return {} def format_record_detail(self, note_id, note_detail, loan_detail): formated = loan_detail formated['note_id'] = note_id formated.update(note_detail) return formated def download_note_details(self, mongo_manager, pagesize=250): """ download note details from lc using records stored in mongo_manager """ logging.info('Fetching records from mongo_manager') all_record_ids = mongo_manager.get_records() total_record_count = len(all_record_ids) logging.info('Fetched %s record', total_record_count) self.login() logging.info('Start downloading at %s' % str(datetime.now())) count = 1 page_record_details = {} start_time = time.time() for note_id, record_ids in all_record_ids.iteritems(): logging.debug('Fetching note %s, loan_id %s, order_id %s', record_ids['noteId'], record_ids['loanGUID'], record_ids['orderId']) note_detail = self.get_note_details(record_ids) loan_detail = self.get_loan_details(record_ids) if not note_detail or not loan_detail: logging.warning('Failed to fetch note %s, omitting that', record_ids['noteId']) continue record_detail = self.format_record_detail(note_id, note_detail, loan_detail) page_record_details[note_id] = record_detail if (count % pagesize == 0): mongo_manager.add_note_details(page_record_details) logging.info('Fetched %s records, %.2f mins elapsed..', count, (time.time() - start_time)/60) page_record_details = {} time.sleep(1) count = count+1 # end loop of record logging.info('Fetched %s records; download complete at %s. %.2f min elapsed.', count, str(datetime.now()), (time.time() - start_time)/60) def download_data(self, max_records=250, pagesize=250, mongo_manager=None, download_details=True): """ Paginate through enough pages of results to get the desired number of records. Optionally ignore negative YTM to reduce the result set. """ RECORD_COUNT_KEY = 'totalRecords' RESULT_SET_KEY = 'searchresult' LOANS_KEY = 'loans' # ensure we're logged in self.login() # Set the high-level search query params self.set_query_params() # How many results match the query? logging.info('Fetching the total matching record count for the query') total_record_count = int( self.get_page_of_notes(limit=1).get(RECORD_COUNT_KEY, 0)) # How many results do we plan to fetch? record_limit = min(max_records, total_record_count) logging.info('Fetching up to %s of %s matching records', record_limit, total_record_count) all_records = {} records_set = Set() offset = 0 logging.info('Start downloading at %s' % str(datetime.now())) start_time = time.time() while offset < record_limit: logging.debug('Fetched %s; getting %s more records from the site', len(records_set), pagesize) # Set the query arguments and fetch the data in a nice dict query_args = {'offset': offset, 'limit': pagesize, } fetched_data = self.get_page_of_notes(**query_args) # Break out early if we're not getting sensible results if not fetched_data: break # Get a list of records from the result fetched_records = fetched_data.get( RESULT_SET_KEY, {}).get(LOANS_KEY, []) page_record_details = {} page_record_ids = {} for record in fetched_records: note_id = record.get('noteId') if note_id in records_set: logging.warning('Looks like we got a duplicate record: %s', record) if mongo_manager : if (download_details): note_detail = self.get_note_details(record) loan_detail = self.get_loan_details(record) record_detail = self.format_record_detail(note_id, note_detail, loan_detail) page_record_details[note_id] = record_detail else: record_ids = {} record_ids['loan_id'] = record.get('loanGUID') record_ids['order_id'] = record.get('orderId') record_ids['note_id'] = record.get('noteId') page_record_ids[note_id] = record_ids else: all_records[note_id] = record records_set.add(note_id) # end loop of record if mongo_manager : if (download_details): mongo_manager.add_note_detail(page_record_details) else: mongo_manager.add_note_ids(page_record_ids) offset += pagesize # end loop of pages logging.info('Fetched %s records; download complete at %s. %.2f min elapsed.', len(records_set), str(datetime.now()), (time.time() - start_time)/60) return all_records def download_historical_loan_data(self): logging.info('Downloading file from %s..', LOAN_DATA_CSV_URL) urlretrieve(LOAN_DATA_CSV_URL, LOAN_DATA_CSV_TMPFILE) logging.info('Done writing to %s', LOAN_DATA_CSV_TMPFILE) return parse_loan_data_from_file(LOAN_DATA_CSV_TMPFILE)
class HttpPostPublisher(BasePublisher): """ Publish metrics via HTTP POST """ def __init__(self, username, password, url='https://localhost:8443/api/metrics/store', buflen=defaultMetricBufferSize, pubfreq=defaultPublishFrequency): super(HttpPostPublisher, self).__init__(buflen, pubfreq) self._username = username self._password = password self._needsAuth = False self._authenticated = False if self._username: self._needsAuth = True self._cookieJar = CookieJar() self._agent = CookieAgent(Agent(reactor), self._cookieJar) self._url = url self._agent_suffix = os.path.basename( sys.argv[0].rstrip(".py")) if sys.argv[0] else "python" reactor.addSystemEventTrigger('before', 'shutdown', self._shutdown) def _metrics_published(self, response, llen, remaining=0): if response.code != 200: if response.code == UNAUTHORIZED: self._authenticated = False self._cookieJar.clear() raise IOError("Expected HTTP 200, but received %d from %s" % (response.code, self._url)) if self._needsAuth: self._authenticated = True log.debug("published %d metrics and received response: %s", llen, response.code) finished = defer.Deferred() response.deliverBody(ResponseReceiver(finished)) if remaining: reactor.callLater(0, self._put, False) return finished def _response_finished(self, result): # The most likely result is the HTTP response from a successful POST, # which should be JSON formatted. if isinstance(result, str): log.debug("response was: %s", json.loads(result)) # We could be called back because _publish_failed was called before us elif isinstance(result, int): log.info("queue still contains %d metrics", result) # Or something strange could have happend else: log.warn("Unexpected result: %s", result) def _shutdown(self): log.debug('shutting down [publishing]') if len(self._mq): self._make_request() def _make_request(self): metrics = [] for x in xrange(HTTP_BATCH): if not self._mq: break metrics.append(self._mq.popleft()) if not metrics: return defer.succeed(None) serialized_metrics = json.dumps({"metrics": metrics}) body_writer = StringProducer(serialized_metrics) headers = Headers({ 'User-Agent': ['Zenoss Metric Publisher: %s' % self._agent_suffix], 'Content-Type': ['application/json'] }) if self._needsAuth and not self._authenticated: log.info("Adding auth for metric http post %s", self._url) headers.addRawHeader( 'Authorization', basic_auth_string_content(self._username, self._password)) d = self._agent.request('POST', self._url, headers, body_writer) d.addCallbacks(self._metrics_published, errback=self._publish_failed, callbackArgs=[len(metrics), len(self._mq)], errbackArgs=[metrics]) d.addCallbacks(self._response_finished, errback=self._publish_failed, errbackArgs=[metrics]) return d def _put(self, scheduled): """ Push the buffer of metrics to the specified Redis channel @param scheduled: scheduled invocation? """ if scheduled: self._reschedule_pubtask(scheduled) if len(self._mq) == 0: return defer.succeed(0) log.debug('trying to publish %d metrics', len(self._mq)) return self._make_request()
class SafeBoxClient(): def __init__(self, server_addr="localhost:8000"): self.server_addr = server_addr self.client_id = self.ccid = self.pin = None self.cookie_jar = CookieJar() self.curr_ticket = "" # startClient: Initializes the client's remaining attributes, # this implies starting a session and eventually client registration. def startClient(self, ccid, passwd, pin): # checking if client is already registered def checkClientReg_cb(success): if success == False: print "User not registered." if pin is None: print "Please provide your Citizen Card for registration" reactor.stop() return else: print "Registering user..." return self.handleRegister() #pprint(self.cookie_jar.__dict__) print "User: "******" logged in." for cookie in self.cookie_jar: #print cookie #print type(cookie) self.curr_ticket = self.client_id.decryptData(cookie.value) # Instanciating ClientIdentity def startClientId_cb(key): self.client_id = ClientIdentity(self.ccid, passwd, key) self.handleStartSession(checkClientReg_cb) self.ccid = ccid if pin is not None: self.pin = pin return self.handleGetKey(startClientId_cb) # Session, Registry and Authentication related opreations # # handleGetKey: handles getkey operations, this happens as the # first step of the startClient operation. def handleGetKey(self, method): def handleGetKey_cb(response): defer = Deferred() defer.addCallback(method) response.deliverBody(DataPrinter(defer, "getkey")) return NOT_DONE_YET agent = Agent(reactor) headers = http_headers.Headers() d = agent.request('GET', 'http://localhost:8000/session/?method=getkey', headers, None) d.addCallback(handleGetKey_cb) return NOT_DONE_YET # handleStartSession: handles startsession operations def handleStartSession(self, method): def procResponse_cb(response): defer = Deferred() defer.addCallback(method) response.deliverBody(DataPrinter(defer, "bool")) return NOT_DONE_YET def startSession_cb((signedNonce, nonceid)): agent = CookieAgent(Agent(reactor), self.cookie_jar) dataq = [] dataq.append(signedNonce) body = _FileProducer( StringIO(self.client_id.encryptData(self.client_id.password)), dataq) headers = http_headers.Headers() d = agent.request( 'PUT', 'http://localhost:8000/session/?method=startsession&ccid=' + self.ccid + '&nonceid=' + str(nonceid), headers, body) d.addCallback(procResponse_cb) return NOT_DONE_YET def getNonce_cb(response): defer = Deferred() defer.addCallback(startSession_cb) response.deliverBody(getNonce(defer, self.client_id, self.pin)) return NOT_DONE_YET if self.pin != None: agent = Agent(reactor) body = FileBodyProducer( StringIO(self.client_id.pub_key.exportKey('PEM'))) headers = http_headers.Headers() d = agent.request( 'GET', 'http://localhost:8000/session/?method=getnonce', headers, body) d.addCallback(getNonce_cb) return NOT_DONE_YET agent = CookieAgent(Agent(reactor), self.cookie_jar) body = FileBodyProducer( StringIO(self.client_id.encryptData(self.client_id.password))) headers = http_headers.Headers() d = agent.request( 'PUT', 'http://localhost:8000/session/?method=startsession&ccid=' + self.ccid + '&nonceid=' + str(-1), headers, body) d.addCallback(procResponse_cb) return NOT_DONE_YET # handleRegister: Handles the registration process. Also part of the startClient operation. def handleRegister(self): def checkClientReg_cb(success): if success == False: print "ERROR: Couldn't register user." reactor.stop() return #pprint(self.cookie_jar.__dict__) for cookie in self.cookie_jar: #print cookie #print type(cookie) self.curr_ticket = self.client_id.decryptData(cookie.value) print "Registration Successful." print "User: "******" logged in." def procResponse_cb(response, method): defer = Deferred() defer.addCallback(method) response.deliverBody(DataPrinter(defer, "bool")) return NOT_DONE_YET def register_cb((signedNonce, nonceid)): agent = CookieAgent(Agent(reactor), self.cookie_jar) dataq = [] dataq.append(signedNonce) dataq.append(self.client_id.encryptData(self.client_id.password)) # Sending the Certificate and the Sub CA to the server if self.pin is None: print "ERROR! Check the pin!" reactor.stop() cert = cc.get_certificate(cc.CERT_LABEL, self.pin) #print type(cert.as_pem()) #print cert.as_pem() if cert is None: print "ERROR! Check the pin" reactor.stop() subca = cc.get_certificate(cc.SUBCA_LABEL, self.pin) #print type(subca.as_pem()) #print subca.as_pem() if subca is None: print "ERROR! Check the pin" reactor.stop() enc_cert = b64encode(cert.as_pem()) #print "cert len: ", len(enc_cert) dataq.append(enc_cert) enc_subca = b64encode(subca.as_pem()) #print "sub ca len: ", len(enc_subca) dataq.append(enc_subca) dataq.append(self.client_id.pub_key.exportKey('PEM')) ext_key = self.client_id.pub_key.exportKey('PEM') if self.pin is None: print "ERROR! Check the pin or the CC" reactor.stop() signed_ext_key = cc.sign(ext_key, cc.KEY_LABEL, self.pin) enc_sek = b64encode(signed_ext_key) #print "encoded ext key: ", enc_sek #print "len encoded: ", len(enc_sek) dataq.append(enc_sek) body = FileProducer2(dataq) headers = http_headers.Headers() #print "Password:"******"LEN:", len(self.client_id.encryptData(self.client_id.password)) d = agent.request( 'PUT', 'http://localhost:8000/pboxes/?method=register' + '&nonceid=' + str(nonceid), headers, body) d.addCallback(procResponse_cb, checkClientReg_cb) def getNonce_cb(response): defer = Deferred() defer.addCallback(register_cb) response.deliverBody(getNonce(defer, self.client_id, self.pin)) return NOT_DONE_YET agent = Agent(reactor) body = FileBodyProducer( StringIO(self.client_id.pub_key.exportKey('PEM'))) headers = http_headers.Headers() d = agent.request('GET', 'http://localhost:8000/session/?method=getnonce', headers, body) d.addCallback(getNonce_cb) return NOT_DONE_YET def processCookie(self, uri): dci = number.long_to_bytes( number.bytes_to_long(self.curr_ticket) + long("1", base=10)) #print "incremented ticket", number.bytes_to_long(dci) self.curr_ticket = dci sci = self.client_id.signData(str(dci)) enc = self.client_id.encryptData(sci) for cookie in self.cookie_jar: cookie.value = enc cookie.path = uri self.cookie_jar.clear() self.cookie_jar.set_cookie(cookie) return dci #print cookie # List Operations # # handleList: handles every list command def handleList_cb(self, response): defer = Deferred() response.deliverBody(DataPrinter(defer, "list")) return NOT_DONE_YET def handleListPboxes(self): args = ("list", str(self.ccid)) salt = self.processCookie("/pboxes") body = FileBodyProducer( StringIO(self.client_id.genHashArgs(args, salt))) #print "hashed:", self.client_id.genHashArgs(args, salt) agent = CookieAgent(Agent(reactor), self.cookie_jar) headers = http_headers.Headers() d = agent.request( 'GET', 'http://localhost:8000/pboxes/?method=list&ccid=' + self.ccid, headers, body) d.addCallback(self.handleList_cb) return NOT_DONE_YET def handleListFiles(self): args = ("list", str(self.ccid)) salt = self.processCookie("/files") body = FileBodyProducer( StringIO(self.client_id.genHashArgs(args, salt))) agent = CookieAgent(Agent(reactor), self.cookie_jar) headers = http_headers.Headers() d = agent.request( 'GET', 'http://localhost:8000/files/?method=list&ccid=' + self.ccid, headers, body) d.addCallback(self.handleList_cb) return NOT_DONE_YET def handleListShares(self): args = ("list", str(self.ccid)) salt = self.processCookie("/shares") body = FileBodyProducer( StringIO(self.client_id.genHashArgs(args, salt))) agent = CookieAgent(Agent(reactor), self.cookie_jar) headers = http_headers.Headers() d = agent.request( 'GET', 'http://localhost:8000/shares/?method=list&ccid=' + self.ccid, headers, body) d.addCallback(self.handleList_cb) return NOT_DONE_YET # Get Operations # # handleGetMData: Handles get pbox metadata operations. def handleGetMData(self, data): #data = (method, tgtccid) pprint(data) def handleGetMData_cb(response): defer = Deferred() defer.addCallback(data[0]) response.deliverBody(DataPrinter(defer, "getmdata")) return NOT_DONE_YET args = ("get_mdata", str(self.ccid), data[1]) salt = self.processCookie("/pboxes") body = FileBodyProducer( StringIO(self.client_id.genHashArgs(args, salt))) agent = CookieAgent(Agent(reactor), self.cookie_jar) headers = http_headers.Headers() d = agent.request( 'GET', 'http://localhost:8000/pboxes/?method=get_mdata&ccid=' + self.ccid + "&tgtccid=" + data[1], headers, body) d.addCallback(handleGetMData_cb) return NOT_DONE_YET # handleGetFileMData: Handles get file metadata operations. def handleGetFileMData(self, data): #data = (method, fileid) def handleGetFileMData_cb(response): defer = Deferred() defer.addCallback(data[0]) response.deliverBody(DataPrinter(defer, "getmdata")) return NOT_DONE_YET args = ("get_mdata", str(self.ccid), data[1]) salt = self.processCookie("/files") body = FileBodyProducer( StringIO(self.client_id.genHashArgs(args, salt))) agent = CookieAgent(Agent(reactor), self.cookie_jar) headers = http_headers.Headers() d = agent.request( 'GET', 'http://localhost:8000/files/?method=get_mdata&ccid=' + self.ccid + "&fileid=" + data[1], headers, body) d.addCallback(handleGetFileMData_cb) return NOT_DONE_YET # handleGetShareMData: Handles get share metadata operations. def handleGetShareMData(self, data): #data = (method, fileid) def handleGetShareMData_cb(response): defer = Deferred() defer.addCallback(data[0]) response.deliverBody(DataPrinter(defer, "getmdata")) return NOT_DONE_YET args = ("get_mdata", str(self.ccid), data[1]) salt = self.processCookie("/shares") body = FileBodyProducer( StringIO(self.client_id.genHashArgs(args, salt))) agent = CookieAgent(Agent(reactor), self.cookie_jar) headers = http_headers.Headers() d = agent.request( 'GET', 'http://localhost:8000/shares/?method=get_mdata&ccid=' + self.ccid + "&fileid=" + data[1], headers, body) d.addCallback(handleGetShareMData_cb) return NOT_DONE_YET # handleGet: handles get file #def handleGet(self, line): def printResult_cb(self, data): pprint(data) #TODO: Format this! return NOT_DONE_YET # for info requests def handleGetInfo(self, s): if s[1].lower() == "pboxinfo": return self.handleGetMData((self.printResult_cb, s[2].lower())) elif s[1].lower() == "fileinfo": return self.handleGetFileMData((self.printResult_cb, s[2].lower())) elif s[1].lower() == "shareinfo": return self.handleGetShareMData( (self.printResult_cb, s[2].lower())) # Decrypt and write the file def writeFile_cb(self, ignore, s): #we should implement http error code checking fileId = s[2] enc_file = open(fileId, "r") if len(s) == 4: dec_file = open(s[3], "w") else: dec_file = open(fileId + "_decrypted", "w") enc_key = enc_file.read(IV_KEY_SIZE_B64) # print "debugging: iv key writefile" # print enc_key print "Decrypting file..." key = self.client_id.decryptData(enc_key) enc_iv = enc_file.read(IV_KEY_SIZE_B64) #print enc_iv iv = self.client_id.decryptData(enc_iv) print iv self.client_id.decryptFileSym(enc_file, dec_file, key, iv) print "File written." # for get file def handleGetFile(self, s): def handleGetFile_cb(response, f): finished = Deferred() finished.addCallback(self.writeFile_cb, s) cons = FileConsumer(f) response.deliverBody(FileDownload(finished, cons)) print "Downloading file..." return finished fileId = s[2] args = ("getfile", str(self.ccid), str(fileId)) salt = self.processCookie("/files") body = FileBodyProducer( StringIO(self.client_id.genHashArgs(args, salt))) agent = CookieAgent(Agent(reactor), self.cookie_jar) headers = http_headers.Headers() d = agent.request( 'GET', 'http://localhost:8000/files/?method=getfile&ccid=' + self.ccid + '&fileid=' + str(fileId), headers, body) f = open(fileId, "w") d.addCallback(handleGetFile_cb, f) return NOT_DONE_YET # for get shared def handleGetShared(self, s): def handleGetShared_cb(response, f): finished = Deferred() finished.addCallback(self.writeFile_cb, s) cons = FileConsumer(f) response.deliverBody(FileDownload(finished, cons)) print "Downloading file..." return finished fileId = s[2] args = ("getshared", str(self.ccid), str(fileId)) salt = self.processCookie("/shares") body = FileBodyProducer( StringIO(self.client_id.genHashArgs(args, salt))) agent = CookieAgent(Agent(reactor), self.cookie_jar) headers = http_headers.Headers() d = agent.request( 'GET', 'http://localhost:8000/shares/?method=getshared&ccid=' + self.ccid + '&fileid=' + fileId, headers, body) f = open(fileId, "w") d.addCallback(handleGetShared_cb, f) return NOT_DONE_YET # Put Operations # printPutReply_cb: prints put and update responses def printPutReply_cb(self, response): print "Done." defer = Deferred() response.deliverBody(DataPrinter(defer, "getmdata")) return NOT_DONE_YET # handlePutFile: handles file upload def handlePutFile(self, line): print "Encrypting file..." s = line.split() file = open(s[2], 'r') enc_file = open("enc_fileout", 'w') crd = self.client_id.encryptFileSym(file, enc_file) args = ("putfile", str(self.ccid), os.path.basename(s[2])) salt = self.processCookie("/files") dataq = [] dataq.append(self.client_id.genHashArgs(args, salt)) dataq.append(self.client_id.encryptData(crd[0], self.client_id.pub_key)) dataq.append(self.client_id.encryptData(crd[1])) agent = CookieAgent(Agent(reactor), self.cookie_jar) #print crd[1] # print "debugging:key, iv putfile" # print dataq[1] # print len(dataq[1]) # print dataq[2] # print len(dataq[2]) print "Uploading file..." enc_file = open("enc_fileout", 'r') body = _FileProducer(enc_file, dataq) headers = http_headers.Headers() d = agent.request( 'PUT', 'http://localhost:8000/files/?method=putfile&ccid=' + self.ccid + "&name=" + os.path.basename(s[2]), headers, body) d.addCallback(self.printPutReply_cb) return NOT_DONE_YET # Update Operations # #handles update commands def handleUpdate(self, s): def encryptFile_cb(data): #TODO: Some error checking here. def updateFile_cb(iv): #data = (key,) print "Updating file..." args = ("updatefile", str(self.ccid), os.path.basename(s[3]), s[2]) salt = self.processCookie("/files") dataq = [] dataq.append(self.client_id.genHashArgs(args, salt)) dataq.append(iv) # print "debugging:ticket, iv updatefile" # print dataq[0] # print dataq[1] # print len(dataq[1]) agent = CookieAgent(Agent(reactor), self.cookie_jar) print "Uploading file..." enc_file = open("enc_fileout", 'r') body = _FileProducer(enc_file, dataq) headers = http_headers.Headers() d = agent.request( 'POST', 'http://localhost:8000/files/?method=updatefile&ccid=' + self.ccid + "&name=" + os.path.basename(s[3]) + "&fileid=" + s[2], headers, body) d.addCallback(self.printPutReply_cb) return NOT_DONE_YET def updateShared_cb(iv): print "Updating file..." args = ("updateshared", str(self.ccid), os.path.basename(s[3]), s[2]) salt = self.processCookie("/shares") dataq = [] dataq.append(self.client_id.genHashArgs(args, salt)) dataq.append(iv) # print "debugging:ticket, iv updatefile" # print dataq[0] # print dataq[1] # print len(dataq[1]) print "Uploading file..." agent = CookieAgent(Agent(reactor), self.cookie_jar) enc_file = open("enc_fileout", 'r') body = _FileProducer(enc_file, dataq) headers = http_headers.Headers() d = agent.request( 'POST', 'http://localhost:8000/shares/?method=updateshared&ccid=' + self.ccid + "&name=" + os.path.basename(s[3]) + "&fileid=" + s[2], headers, body) d.addCallback(self.printPutReply_cb) return NOT_DONE_YET if isinstance(data, basestring): print data return print "Encrypting file..." #print data["data"]["SymKey"] enc_key = data["data"]["SymKey"] key = self.client_id.decryptData(enc_key, self.client_id.priv_key) #print len(key) file = open(s[3], 'r') enc_file = open("enc_fileout", 'w') crd = self.client_id.encryptFileSym(file, enc_file, key=key) new_iv = self.client_id.encryptData(crd[1]) if s[1] == "shared": return updateShared_cb(new_iv) return updateFile_cb(new_iv) hsmd_data = (encryptFile_cb, s[2]) if s[1] == "file": return self.handleGetFileMData(hsmd_data) return self.handleGetShareMData(hsmd_data) def handleUpdateSharePerm(self, s): args = ("updateshareperm", str(self.ccid), s[3], s[2], s[4]) salt = self.processCookie("/shares") body = FileBodyProducer( StringIO(self.client_id.genHashArgs(args, salt))) agent = CookieAgent(Agent(reactor), self.cookie_jar) headers = http_headers.Headers() d = agent.request( 'POST', 'http://localhost:8000/shares/?method=updateshareperm&ccid=' + self.ccid + "&rccid=" + s[3] + "&fileid=" + s[2] + "&writeable=" + s[4], headers, body) d.addCallback(self.printPutReply_cb) return NOT_DONE_YET #Delete Operaions # # handleDelete: handles delete commands def handleDelete(self, line): def printDeleteReply_cb(data): if not data: print "Done." else: print "Done." def deleteFile_cb(): args = ("delete", str(self.ccid), s[2]) salt = self.processCookie("/files") body = FileBodyProducer( StringIO(self.client_id.genHashArgs(args, salt))) agent = CookieAgent(Agent(reactor), self.cookie_jar) headers = http_headers.Headers() d = agent.request( 'DELETE', 'http://localhost:8000/files/?method=delete&ccid=' + self.ccid + "&fileid=" + s[2], headers, body) d.addCallback(printDeleteReply_cb) def deleteShare_cb(): args = ("delete", str(self.ccid), s[2], s[3]) salt = self.processCookie("/shares") body = FileBodyProducer( StringIO(self.client_id.genHashArgs(args, salt))) agent = CookieAgent(Agent(reactor), self.cookie_jar) headers = http_headers.Headers() d = agent.request( 'DELETE', 'http://localhost:8000/shares/?method=delete&ccid=' + self.ccid + "&fileid=" + s[2] + "&rccid=" + s[3], headers, body) d.addCallback(printDeleteReply_cb) s = line.split() if len(s) == 4: return deleteShare_cb() if len(s) == 3: return deleteFile_cb() print "Error: invalid arguments!\n" print "Usage: delete <file|share> <fileid> <None|rccid>" return # Share Operation # def handleShare(self, line): def getFKey_cb(data): enc_key = data["data"]["SymKey"] def getDstKey_cb(data): dstkey = data["data"]["PubKey"] print "pubkey" + dstkey def shareFile_cb(): args = ("delete", str(self.ccid), s[3], s[2]) salt = self.processCookie("/shares") dataq = [] dataq.append(self.client_id.genHashArgs(args, salt)) dataq.append(enc_sym_key) print "Uploading symkey..." agent = CookieAgent(Agent(reactor), self.cookie_jar) body = _FileProducer(StringIO(""), dataq) headers = http_headers.Headers() d = agent.request( 'PUT', 'http://localhost:8000/shares/?method=sharefile&ccid=' + self.ccid + "&rccid=" + s[3] + "&fileid=" + s[2], headers, body) d.addCallback(self.printPutReply_cb) return d #enc_key = data["data"]["SymKey"] sym_key = self.client_id.decryptData(enc_key, self.client_id.priv_key) dstkey = RSA.importKey(dstkey) enc_sym_key = self.client_id.encryptData(sym_key, dstkey) return shareFile_cb() hfmd_data = (getDstKey_cb, s[3].lower()) return self.handleGetMData(hfmd_data) s = line.split() if len(s) == 4: hmd_data = (getFKey_cb, s[2].lower()) return self.handleGetFileMData(hmd_data) else: if s[1].lower() != "file": print "Error: invalid arguments!\n" print "Usage: share file <fileid> <recipient's ccid>" return
class ControlPlaneClient(object): """ """ def __init__(self, user, password, host=None, port=None): """ """ self._cj = CookieJar() self._opener = urllib2.build_opener( urllib2.HTTPHandler(), urllib2.HTTPSHandler(), urllib2.HTTPCookieProcessor(self._cj) ) self._server = { "host": host if host else _DEFAULT_HOST, "port": port if port else _DEFAULT_PORT, } self._creds = {"username": user, "password": password} self._netloc = "%(host)s:%(port)s" % self._server def queryServices(self, name=None, tags=None): """ Returns a sequence of ServiceDefinition objects that match the given requirements. """ query = {} if name: namepat = fnmatch.translate(name) # controlplane regex accepts \z, not \Z. namepat = namepat.replace("\\Z", "\\z") query["name"] = namepat if tags: if isinstance(tags, (str, unicode)): tags = [tags] query["tags"] = ','.join(tags) response = self._dorequest("/services", query=query) body = ''.join(response.readlines()) response.close() decoded = ServiceJsonDecoder().decode(body) if decoded is None: decoded = [] return decoded def getService(self, serviceId, default=None): """ Returns the ServiceDefinition object for the given service. """ response = self._dorequest("/services/%s" % serviceId) body = ''.join(response.readlines()) response.close() return ServiceJsonDecoder().decode(body) def updateService(self, service): """ Updates the definition/state of a service. :param ServiceDefinition service: The modified definition """ body = ServiceJsonEncoder().encode(service) response = self._dorequest( service.resourceId, method="PUT", data=body ) body = ''.join(response.readlines()) response.close() def queryServiceInstances(self, serviceId): """ Returns a sequence of ServiceInstance objects. """ response = self._dorequest("/services/%s/running" % serviceId) body = ''.join(response.readlines()) response.close() return ServiceJsonDecoder().decode(body) def getInstance(self, serviceId, instanceId, default=None): """ Returns the requested ServiceInstance object. """ response = self._dorequest( "/services/%s/running/%s" % (serviceId, instanceId) ) body = ''.join(response.readlines()) response.close() return ServiceJsonDecoder().decode(body) def getServiceLog(self, serviceId, start=0, end=None): """ """ response = self._dorequest("/services/%s/logs" % serviceId) body = ''.join(response.readlines()) response.close() log = json.loads(body) return log["Detail"] def getInstanceLog(self, serviceId, instanceId, start=0, end=None): """ """ response = self._dorequest( "/services/%s/%s/logs" % (serviceId, instanceId) ) body = ''.join(response.readlines()) response.close() log = json.loads(body) return str(log["Detail"]) def killInstance(self, hostId, instanceId): """ """ response = self._dorequest( "/hosts/%s/%s" % (hostId, instanceId), method="DELETE" ) response.close() def _makeRequest(self, uri, method=None, data=None, query=None): query = urllib.urlencode(query) if query else "" url = urlunparse(("http", self._netloc, uri, "", query, "")) args = {} if method: args["method"] = method if data: args["data"] = data args["headers"] = {"Content-Type": "application/json"} return _Request(url, **args) def _login(self): # Clear the cookie jar before logging in. self._cj.clear() encodedbody = json.dumps(self._creds) request = self._makeRequest("/login", data=encodedbody) response = self._opener.open(request) response.close() self._opener.close() def _dorequest(self, uri, method=None, data=None, query=None): request = self._makeRequest( uri, method=method, data=data, query=query) # Try to perform the request up to five times for trycount in range(5): try: return self._opener.open(request) except urllib2.HTTPError as ex: if ex.getcode() == 401: self._login() continue else: raise else: # break the loop so we skip the loop's else clause break else: # raises the last exception that was raised (the 401 error) raise
ppft = re.search('<input type="hidden" name="PPFT" id="[^"]+" value="([^"]+)"', html).group(1) action = re.search("var srf_uPost='([^']+)'", html).group(1) postData = dict(re.findall("var srf_s([^=]+)='([^']+)';", html)) html = urllib2.urlopen(action, urllib.urlencode({ 'PPFT': ppft, 'login': email, 'passwd': password, }).encode()).read().decode('utf-8') if html.find('replace("http://mail.live.com/default.aspx?rru=inbox")') == -1: print "Can't login into HotMail with: %s - %s" % (name, password) else: # cookie.clear(domain='.mail.live.com', path='/', name='KVC') cookie.clear(domain='.live.com', path='/', name='WLSSC') print 'Logged in. Redirecting to email inbox...', nexturl = 'http://mail.live.com/default.aspx?rru=inbox' inboxURL = '' while nexturl: try: print '.', # print "\tRedirecting to %s" % nexturl req = urllib2.urlopen(nexturl) html = req.read().decode('utf-8') inboxURL = req.url nexturl = False except urllib2.HTTPError as e: print e.read() nexturl = urlparse.urljoin(nexturl, e.headers['location'])
class HttpPostPublisher(BasePublisher): """ Publish metrics via HTTP POST """ def __init__(self, username, password, url='https://localhost:8443/api/metrics/store', buflen=defaultMetricBufferSize, pubfreq=defaultPublishFrequency): super(HttpPostPublisher, self).__init__(buflen, pubfreq) self._username = username self._password = password self._needsAuth = False self._authenticated = False if self._username: self._needsAuth = True self._cookieJar = CookieJar() self._agent = CookieAgent(Agent(reactor), self._cookieJar) self._url = url self._agent_suffix = os.path.basename(sys.argv[0].rstrip(".py")) if sys.argv[0] else "python" reactor.addSystemEventTrigger('before', 'shutdown', self._shutdown) def _metrics_published(self, response, llen, remaining=0): if response.code != 200: if response.code == UNAUTHORIZED: self._authenticated = False self._cookieJar.clear() raise IOError("Expected HTTP 200, but received %d from %s" % (response.code, self._url)) if self._needsAuth: self._authenticated = True log.debug("published %d metrics and received response: %s", llen, response.code) finished = defer.Deferred() response.deliverBody(ResponseReceiver(finished)) if remaining: reactor.callLater(0, self._put, False) return finished def _response_finished(self, result): # The most likely result is the HTTP response from a successful POST, # which should be JSON formatted. if isinstance(result, str): log.debug("response was: %s", json.loads(result)) # We could be called back because _publish_failed was called before us elif isinstance(result, int): log.info("queue still contains %d metrics", result) # Or something strange could have happend else: log.warn("Unexpected result: %s", result) def _shutdown(self): log.debug('shutting down [publishing]') if len(self._mq): self._make_request() def _make_request(self): metrics = [] for x in xrange(HTTP_BATCH): if not self._mq: break metrics.append(self._mq.popleft()) if not metrics: return defer.succeed(None) serialized_metrics = json.dumps({"metrics": metrics}) body_writer = StringProducer(serialized_metrics) headers = Headers({ 'User-Agent': ['Zenoss Metric Publisher: %s' % self._agent_suffix], 'Content-Type': ['application/json']}) if self._needsAuth and not self._authenticated: log.info("Adding auth for metric http post %s", self._url) headers.addRawHeader('Authorization', basic_auth_string_content(self._username, self._password)) d = self._agent.request( 'POST', self._url, headers, body_writer) d.addCallbacks(self._metrics_published, errback=self._publish_failed, callbackArgs = [len(metrics), len(self._mq)], errbackArgs = [metrics]) d.addCallbacks(self._response_finished, errback=self._publish_failed, errbackArgs = [metrics]) return d def _put(self, scheduled): """ Push the buffer of metrics to the specified Redis channel @param scheduled: scheduled invocation? """ if scheduled: self._reschedule_pubtask(scheduled) if len(self._mq) == 0: return defer.succeed(0) log.debug('trying to publish %d metrics', len(self._mq)) return self._make_request()
class Bugz: """ Converts sane method calls to Bugzilla HTTP requests. @ivar base: base url of bugzilla. @ivar user: username for authenticated operations. @ivar password: password for authenticated operations @ivar cookiejar: for authenticated sessions so we only auth once. @ivar forget: forget user/password after session. @ivar authenticated: is this session authenticated already """ def __init__(self, base, user=None, password=None, forget=False, skip_auth=False, httpuser=None, httppassword=None): """ {user} and {password} will be prompted if an action needs them and they are not supplied. if {forget} is set, the login cookie will be destroyed on quit. @param base: base url of the bugzilla @type base: string @keyword user: username for authenticated actions. @type user: string @keyword password: password for authenticated actions. @type password: string @keyword forget: forget login session after termination. @type forget: bool @keyword skip_auth: do not authenticate @type skip_auth: bool """ self.base = base scheme, self.host, self.path, query, frag = urlsplit(self.base) self.authenticated = False self.forget = forget if not self.forget: try: cookie_file = os.path.join(os.environ['HOME'], COOKIE_FILE) self.cookiejar = LWPCookieJar(cookie_file) if forget: try: self.cookiejar.load() self.cookiejar.clear() self.cookiejar.save() os.chmod(self.cookiejar.filename, 0600) except IOError: pass except KeyError: self.warn('Unable to save session cookies in %s' % cookie_file) self.cookiejar = CookieJar(cookie_file) else: self.cookiejar = CookieJar() self.opener = build_opener(HTTPCookieProcessor(self.cookiejar)) self.user = user self.password = password self.httpuser = httpuser self.httppassword = httppassword self.skip_auth = skip_auth def log(self, status_msg): """Default logging handler. Expected to be overridden by the UI implementing subclass. @param status_msg: status message to print @type status_msg: string """ return def warn(self, warn_msg): """Default logging handler. Expected to be overridden by the UI implementing subclass. @param status_msg: status message to print @type status_msg: string """ return def get_input(self, prompt): """Default input handler. Expected to be override by the UI implementing subclass. @param prompt: Prompt message @type prompt: string """ return '' def auth(self): """Authenticate a session. """ # check if we need to authenticate if self.authenticated: return # try seeing if we really need to request login if not self.forget: try: self.cookiejar.load() except IOError: pass req_url = urljoin(self.base, config.urls['auth']) req_url += '?GoAheadAndLogIn=1' req = Request(req_url, None, config.headers) if self.httpuser and self.httppassword: base64string = base64.encodestring( '%s:%s' % (self.httpuser, self.httppassword))[:-1] req.add_header("Authorization", "Basic %s" % base64string) resp = self.opener.open(req) re_request_login = re.compile(r'<title>.*Log in to .*</title>') if not re_request_login.search(resp.read()): self.log('Already logged in.') self.authenticated = True return # prompt for username if we were not supplied with it if not self.user: self.log('No username given.') self.user = self.get_input('Username: '******'No password given.') self.password = getpass.getpass() # perform login qparams = config.params['auth'].copy() qparams['Bugzilla_login'] = self.user qparams['Bugzilla_password'] = self.password if not self.forget: qparams['Bugzilla_remember'] = 'on' req_url = urljoin(self.base, config.urls['auth']) req = Request(req_url, urlencode(qparams), config.headers) if self.httpuser and self.httppassword: base64string = base64.encodestring( '%s:%s' % (self.httpuser, self.httppassword))[:-1] req.add_header("Authorization", "Basic %s" % base64string) resp = self.opener.open(req) if resp.info().has_key('Set-Cookie'): self.authenticated = True if not self.forget: self.cookiejar.save() os.chmod(self.cookiejar.filename, 0600) return True else: raise RuntimeError("Failed to login") def extractResults(self, resp): # parse the results into dicts. results = [] columns = [] rows = [] for r in csv.reader(resp): rows.append(r) for field in rows[0]: if config.choices['column_alias'].has_key(field): columns.append(config.choices['column_alias'][field]) else: self.log('Unknown field: ' + field) columns.append(field) for row in rows[1:]: if "Missing Search" in row[0]: self.log('Bugzilla error (Missing search found)') return None fields = {} for i in range(min(len(row), len(columns))): fields[columns[i]] = row[i] results.append(fields) return results def search(self, query, comments=False, order='number', assigned_to=None, reporter=None, cc=None, commenter=None, whiteboard=None, keywords=None, status=[], severity=[], priority=[], product=[], component=[]): """Search bugzilla for a bug. @param query: query string to search in title or {comments}. @type query: string @param order: what order to returns bugs in. @type order: string @keyword assigned_to: email address which the bug is assigned to. @type assigned_to: string @keyword reporter: email address matching the bug reporter. @type reporter: string @keyword cc: email that is contained in the CC list @type cc: string @keyword commenter: email of a commenter. @type commenter: string @keyword whiteboard: string to search in status whiteboard (gentoo?) @type whiteboard: string @keyword keywords: keyword to search for @type keywords: string @keyword status: bug status to match. default is ['NEW', 'ASSIGNED', 'REOPENED']. @type status: list @keyword severity: severity to match, empty means all. @type severity: list @keyword priority: priority levels to patch, empty means all. @type priority: list @keyword comments: search comments instead of just bug title. @type comments: bool @keyword product: search within products. empty means all. @type product: list @keyword component: search within components. empty means all. @type component: list @return: list of bugs, each bug represented as a dict @rtype: list of dicts """ if not self.authenticated and not self.skip_auth: self.auth() qparams = config.params['list'].copy() qparams['value0-0-0'] = query if comments: qparams['type0-0-1'] = qparams['type0-0-0'] qparams['value0-0-1'] = query qparams['order'] = config.choices['order'].get(order, 'Bug Number') qparams['bug_severity'] = severity or [] qparams['priority'] = priority or [] if status is None: # NEW, ASSIGNED and REOPENED is obsolete as of bugzilla 3.x and has # been removed from bugs.gentoo.org on 2011/05/01 qparams['bug_status'] = [ 'NEW', 'ASSIGNED', 'REOPENED', 'UNCONFIRMED', 'CONFIRMED', 'IN_PROGRESS' ] elif [s.upper() for s in status] == ['ALL']: qparams['bug_status'] = config.choices['status'] else: qparams['bug_status'] = [s.upper() for s in status] qparams['product'] = product or '' qparams['component'] = component or '' qparams['status_whiteboard'] = whiteboard or '' qparams['keywords'] = keywords or '' # hoops to jump through for emails, since there are # only two fields, we have to figure out what combinations # to use if all three are set. unique = list(set([assigned_to, cc, reporter, commenter])) unique = [u for u in unique if u] if len(unique) < 3: for i in range(len(unique)): e = unique[i] n = i + 1 qparams['email%d' % n] = e qparams['emailassigned_to%d' % n] = int(e == assigned_to) qparams['emailreporter%d' % n] = int(e == reporter) qparams['emailcc%d' % n] = int(e == cc) qparams['emaillongdesc%d' % n] = int(e == commenter) else: raise AssertionError('Cannot set assigned_to, cc, and ' 'reporter in the same query') req_params = urlencode(qparams, True) req_url = urljoin(self.base, config.urls['list']) req_url += '?' + req_params req = Request(req_url, None, config.headers) if self.httpuser and self.httppassword: base64string = base64.encodestring( '%s:%s' % (self.httpuser, self.httppassword))[:-1] req.add_header("Authorization", "Basic %s" % base64string) resp = self.opener.open(req) return self.extractResults(resp) def namedcmd(self, cmd): """Run command stored in Bugzilla by name. @return: Result from the stored command. @rtype: list of dicts """ if not self.authenticated and not self.skip_auth: self.auth() qparams = config.params['namedcmd'].copy() # Is there a better way of getting a command with a space in its name # to be encoded as foo%20bar instead of foo+bar or foo%2520bar? qparams['namedcmd'] = quote(cmd) req_params = urlencode(qparams, True) req_params = req_params.replace('%25', '%') req_url = urljoin(self.base, config.urls['list']) req_url += '?' + req_params req = Request(req_url, None, config.headers) if self.user and self.password: base64string = base64.encodestring('%s:%s' % (self.user, self.password))[:-1] req.add_header("Authorization", "Basic %s" % base64string) resp = self.opener.open(req) return self.extractResults(resp) def get(self, bugid): """Get an ElementTree representation of a bug. @param bugid: bug id @type bugid: int @rtype: ElementTree """ if not self.authenticated and not self.skip_auth: self.auth() qparams = config.params['show'].copy() qparams['id'] = bugid req_params = urlencode(qparams, True) req_url = urljoin(self.base, config.urls['show']) req_url += '?' + req_params req = Request(req_url, None, config.headers) if self.httpuser and self.httppassword: base64string = base64.encodestring( '%s:%s' % (self.httpuser, self.httppassword))[:-1] req.add_header("Authorization", "Basic %s" % base64string) resp = self.opener.open(req) data = resp.read() # Get rid of control characters. data = re.sub('[\x00-\x08\x0e-\x1f\x0b\x0c]', '', data) fd = StringIO(data) # workaround for ill-defined XML templates in bugzilla 2.20.2 (major_version, minor_version) = \ (sys.version_info[0], sys.version_info[1]) if major_version > 2 or \ (major_version == 2 and minor_version >= 7): # If this is 2.7 or greater, then XMLTreeBuilder # does what we want. parser = ElementTree.XMLParser() else: # Running under Python 2.6, so we need to use our # subclass of XMLTreeBuilder instead. parser = ForcedEncodingXMLTreeBuilder(encoding='utf-8') etree = ElementTree.parse(fd, parser) bug = etree.find('.//bug') if bug is not None and bug.attrib.has_key('error'): return None else: return etree def modify(self, bugid, title=None, comment=None, url=None, status=None, resolution=None, assigned_to=None, duplicate=0, priority=None, severity=None, add_cc=[], remove_cc=[], add_dependson=[], remove_dependson=[], add_blocked=[], remove_blocked=[], whiteboard=None, keywords=None, component=None): """Modify an existing bug @param bugid: bug id @type bugid: int @keyword title: new title for bug @type title: string @keyword comment: comment to add @type comment: string @keyword url: new url @type url: string @keyword status: new status (note, if you are changing it to RESOLVED, you need to set {resolution} as well. @type status: string @keyword resolution: new resolution (if status=RESOLVED) @type resolution: string @keyword assigned_to: email (needs to exist in bugzilla) @type assigned_to: string @keyword duplicate: bug id to duplicate against (if resolution = DUPLICATE) @type duplicate: int @keyword priority: new priority for bug @type priority: string @keyword severity: new severity for bug @type severity: string @keyword add_cc: list of emails to add to the cc list @type add_cc: list of strings @keyword remove_cc: list of emails to remove from cc list @type remove_cc: list of string. @keyword add_dependson: list of bug ids to add to the depend list @type add_dependson: list of strings @keyword remove_dependson: list of bug ids to remove from depend list @type remove_dependson: list of strings @keyword add_blocked: list of bug ids to add to the blocked list @type add_blocked: list of strings @keyword remove_blocked: list of bug ids to remove from blocked list @type remove_blocked: list of strings @keyword whiteboard: set status whiteboard @type whiteboard: string @keyword keywords: set keywords @type keywords: string @keyword component: set component @type component: string @return: list of fields modified. @rtype: list of strings """ if not self.authenticated and not self.skip_auth: self.auth() buginfo = Bugz.get(self, bugid) if not buginfo: return False modified = [] qparams = config.params['modify'].copy() qparams['id'] = bugid # NOTE: knob has been removed in bugzilla 4 and 3? qparams['knob'] = 'none' # copy existing fields FIELDS = ('bug_file_loc', 'bug_severity', 'short_desc', 'bug_status', 'status_whiteboard', 'keywords', 'resolution', 'op_sys', 'priority', 'version', 'target_milestone', 'assigned_to', 'rep_platform', 'product', 'component', 'token') FIELDS_MULTI = ('blocked', 'dependson') for field in FIELDS: try: qparams[field] = buginfo.find('.//%s' % field).text if qparams[field] is None: del qparams[field] except: pass for field in FIELDS_MULTI: qparams[field] = [ d.text for d in buginfo.findall('.//%s' % field) if d is not None and d.text is not None ] # set 'knob' if we are change the status/resolution # or trying to reassign bug. if status: status = status.upper() if resolution: resolution = resolution.upper() if status and status != qparams['bug_status']: # Bugzilla >= 3.x qparams['bug_status'] = status if status == 'RESOLVED': qparams['knob'] = 'resolve' if resolution: qparams['resolution'] = resolution else: qparams['resolution'] = 'FIXED' modified.append(('status', status)) modified.append(('resolution', qparams['resolution'])) elif status == 'ASSIGNED' or status == 'IN_PROGRESS': qparams['knob'] = 'accept' modified.append(('status', status)) elif status == 'REOPENED': qparams['knob'] = 'reopen' modified.append(('status', status)) elif status == 'VERIFIED': qparams['knob'] = 'verified' modified.append(('status', status)) elif status == 'CLOSED': qparams['knob'] = 'closed' modified.append(('status', status)) elif duplicate: # Bugzilla >= 3.x qparams['bug_status'] = "RESOLVED" qparams['resolution'] = "DUPLICATE" qparams['knob'] = 'duplicate' qparams['dup_id'] = duplicate modified.append(('status', 'RESOLVED')) modified.append(('resolution', 'DUPLICATE')) elif assigned_to: qparams['knob'] = 'reassign' qparams['assigned_to'] = assigned_to modified.append(('assigned_to', assigned_to)) # setup modification of other bits if comment: qparams['comment'] = comment modified.append(('comment', ellipsis(comment, 60))) if title: qparams['short_desc'] = title or '' modified.append(('title', title)) if url is not None: qparams['bug_file_loc'] = url modified.append(('url', url)) if severity is not None: qparams['bug_severity'] = severity modified.append(('severity', severity)) if priority is not None: qparams['priority'] = priority modified.append(('priority', priority)) # cc manipulation if add_cc is not None: qparams['newcc'] = ', '.join(add_cc) modified.append(('newcc', qparams['newcc'])) if remove_cc is not None: qparams['cc'] = remove_cc qparams['removecc'] = 'on' modified.append(('cc', remove_cc)) # bug depend/blocked manipulation changed_dependson = False changed_blocked = False if remove_dependson: for bug_id in remove_dependson: qparams['dependson'].remove(str(bug_id)) changed_dependson = True if remove_blocked: for bug_id in remove_blocked: qparams['blocked'].remove(str(bug_id)) changed_blocked = True if add_dependson: for bug_id in add_dependson: qparams['dependson'].append(str(bug_id)) changed_dependson = True if add_blocked: for bug_id in add_blocked: qparams['blocked'].append(str(bug_id)) changed_blocked = True qparams['dependson'] = ','.join(qparams['dependson']) qparams['blocked'] = ','.join(qparams['blocked']) if changed_dependson: modified.append(('dependson', qparams['dependson'])) if changed_blocked: modified.append(('blocked', qparams['blocked'])) if whiteboard is not None: qparams['status_whiteboard'] = whiteboard modified.append(('status_whiteboard', whiteboard)) if keywords is not None: qparams['keywords'] = keywords modified.append(('keywords', keywords)) if component is not None: qparams['component'] = component modified.append(('component', component)) req_params = urlencode(qparams, True) req_url = urljoin(self.base, config.urls['modify']) req = Request(req_url, req_params, config.headers) if self.httpuser and self.httppassword: base64string = base64.encodestring( '%s:%s' % (self.httpuser, self.httppassword))[:-1] req.add_header("Authorization", "Basic %s" % base64string) try: resp = self.opener.open(req) re_error = re.compile(r'id="error_msg".*>([^<]+)<') error = re_error.search(resp.read()) if error: print error.group(1) return [] return modified except: return [] def attachment(self, attachid): """Get an attachment by attachment_id @param attachid: attachment id @type attachid: int @return: dict with three keys, 'filename', 'size', 'fd' @rtype: dict """ if not self.authenticated and not self.skip_auth: self.auth() qparams = config.params['attach'].copy() qparams['id'] = attachid req_params = urlencode(qparams, True) req_url = urljoin(self.base, config.urls['attach']) req_url += '?' + req_params req = Request(req_url, None, config.headers) if self.httpuser and self.httppassword: base64string = base64.encodestring( '%s:%s' % (self.httpuser, self.httppassword))[:-1] req.add_header("Authorization", "Basic %s" % base64string) resp = self.opener.open(req) try: content_type = resp.info()['Content-type'] namefield = content_type.split(';')[1] filename = re.search(r'name=\"(.*)\"', namefield).group(1) content_length = int(resp.info()['Content-length'], 0) return {'filename': filename, 'size': content_length, 'fd': resp} except: return {} def post(self, product, component, title, description, url='', assigned_to='', cc='', keywords='', version='', dependson='', blocked='', priority='', severity=''): """Post a bug @param product: product where the bug should be placed @type product: string @param component: component where the bug should be placed @type component: string @param title: title of the bug. @type title: string @param description: description of the bug @type description: string @keyword url: optional url to submit with bug @type url: string @keyword assigned_to: optional email to assign bug to @type assigned_to: string. @keyword cc: option list of CC'd emails @type: string @keyword keywords: option list of bugzilla keywords @type: string @keyword version: version of the component @type: string @keyword dependson: bugs this one depends on @type: string @keyword blocked: bugs this one blocks @type: string @keyword priority: priority of this bug @type: string @keyword severity: severity of this bug @type: string @rtype: int @return: the bug number, or 0 if submission failed. """ if not self.authenticated and not self.skip_auth: self.auth() qparams = config.params['post'].copy() qparams['product'] = product qparams['component'] = component qparams['short_desc'] = title qparams['comment'] = description qparams['assigned_to'] = assigned_to qparams['cc'] = cc qparams['bug_file_loc'] = url qparams['dependson'] = dependson qparams['blocked'] = blocked qparams['keywords'] = keywords #XXX: default version is 'unspecified' if version != '': qparams['version'] = version #XXX: default priority is 'Normal' if priority != '': qparams['priority'] = priority #XXX: default severity is 'normal' if severity != '': qparams['bug_severity'] = severity req_params = urlencode(qparams, True) req_url = urljoin(self.base, config.urls['post']) req = Request(req_url, req_params, config.headers) if self.httpuser and self.httppassword: base64string = base64.encodestring( '%s:%s' % (self.httpuser, self.httppassword))[:-1] req.add_header("Authorization", "Basic %s" % base64string) resp = self.opener.open(req) try: re_bug = re.compile( r'(?:\s+)?<title>.*Bug ([0-9]+) Submitted.*</title>') bug_match = re_bug.search(resp.read()) if bug_match: return int(bug_match.group(1)) except: pass return 0 def attach(self, bugid, title, description, filename, content_type='text/plain', ispatch=False): """Attach a file to a bug. @param bugid: bug id @type bugid: int @param title: short description of attachment @type title: string @param description: long description of the attachment @type description: string @param filename: filename of the attachment @type filename: string @keywords content_type: mime-type of the attachment @type content_type: string @rtype: bool @return: True if successful, False if not successful. """ if not self.authenticated and not self.skip_auth: self.auth() qparams = config.params['attach_post'].copy() qparams['bugid'] = bugid qparams['description'] = title qparams['comment'] = description if ispatch: qparams['ispatch'] = '1' qparams['contenttypeentry'] = 'text/plain' else: qparams['contenttypeentry'] = content_type filedata = [('data', filename, open(filename).read())] content_type, body = encode_multipart_formdata(qparams.items(), filedata) req_headers = config.headers.copy() req_headers['Content-type'] = content_type req_headers['Content-length'] = len(body) req_url = urljoin(self.base, config.urls['attach_post']) req = Request(req_url, body, req_headers) if self.httpuser and self.httppassword: base64string = base64.encodestring( '%s:%s' % (self.httpuser, self.httppassword))[:-1] req.add_header("Authorization", "Basic %s" % base64string) resp = self.opener.open(req) # TODO: return attachment id and success? try: re_attach = re.compile(r'<title>(.+)</title>') # Bugzilla 3/4 re_attach34 = re.compile(r'Attachment \d+ added to Bug \d+') response = resp.read() attach_match = re_attach.search(response) if attach_match: if attach_match.group( 1) == "Changes Submitted" or re_attach34.match( attach_match.group(1)): return True else: return attach_match.group(1) else: return False except: pass return False
class Connection: ENCODING = 'gb18030' USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:9.0.1) Gecko/20100101 Firefox/9.0.1' BBS_URL = 'http://bbs.nju.edu.cn/' DATE_FORMAT = '%b %d %H:%M' LINE_WIDTH = 40 base_url = 'http://bbs.nju.edu.cn/' def __init__(self, session=None): self._cj = CookieJar() self._opener = urllib2.build_opener( urllib2.HTTPCookieProcessor(self._cj)) self._opener.addheaders = [('User-Agent', self.USER_AGENT)] self._opener.addheaders = [('Referer', self.BBS_URL)] if session: self.load_session(session) def _do_action(self, action, params=None, data=None): args = [] if params: for k, v in params.items(): if isinstance(v, list): args += ['{0}={1}'.format(k, i) for i in v] else: args.append('{0}={1}'.format(k, v)) url = self.base_url + action + ('?' if args else '') + '&'.join(args) logger.debug(url) body = [] if data: for k, v in data.items(): body.append('{0}={1}'.format(quote(k), quote(unicode(v).encode(self.ENCODING)))) try: response = self._opener.open(url, '&'.join(body) if data else None) except URLError: raise NetworkError() # decode() in py2.6 does not support `errors` kwarg. html = response.read().decode(self.ENCODING, 'ignore') # TODO: BeautifulSoup still needs this? html = html.replace(u'<nobr>', u'') # damn it return html def load_session(self, session): from utils import make_cookie self.base_url = '{0}vd{1}/'.format(self.BBS_URL, session.vd) self._cj.set_cookie(make_cookie('_U_KEY', session.key)) self._cj.set_cookie(make_cookie('_U_UID', session.uid)) self._cj.set_cookie(make_cookie('_U_NUM', session.num)) def is_logged_in(self): html = self._do_action('bbsfoot') return html.find('bbsqry?userid=guest') == -1 def login(self, username, password): ''' return Session if successful else None ''' from random import randint session = Session() session.vd = str(randint(10000, 100000)) self.base_url = '{0}vd{1}/'.format(self.BBS_URL, session.vd) params = {'type': 2} data = {u'id': username, u'pw': password} html = self._do_action('bbslogin', params, data) try: s = re.search(r"setCookie\('(.*)'\)", html).group(1) except AttributeError: return None s = s.split('+') session.key = str(int(s[-1]) - 2) s = s[0].split('N') session.uid = s[-1] session.num = str(int(s[0]) + 2) self.load_session(session) return session def logout(self, session=None): if session: self.load_session(session) data = {'Submit': u'注销登录'.encode(self.ENCODING)} self._do_action('bbslogout', '', data) self._cj.clear() self.base_url = self.BBS_URL def compose(self, board, title, body, pid=None, gid=None, signature=0): ''' XXX: unicode ''' params = {'board': board} lines = body.split(u'\r\n') body = [] for i in lines: body.append(u'\r\n'.join(wrap(i, self.LINE_WIDTH))) body = u'\r\n'.join(body) data = {'title': title, 'text': body} if pid is not None: data['reid'] = pid data['pid'] = gid data['signature'] = signature html = self._do_action('bbssnd', params, data) return 'Refresh' in html def fetch_post(self, board, pid, num): params = {'board': board, 'file': pid2str(pid), 'num': num} html = self._do_action('bbscon', params) soup = BeautifulSoup(html) txt = soup.find('textarea').text ret = Post(board, pid, num) ret.parse_post(txt) # TODO: works for 'x' post s = soup.findAll('a')[-1]['href'] gid = parse_qs(urlparse(s).query).get('gid', None) if gid is not None: ret.gid = gid[0] else: ret.gid = None return ret def fetch_topic(self, board, pid, start=None): params = {'board': board, 'file': pid2str(pid)} if start: params['start'] = start html = self._do_action('bbstcon', params) soup = BeautifulSoup(html) ret = Topic(board, pid) items = soup.findAll('table', {'class': 'main'}) if not items: raise ContentError() for i in items: c = i.tr.td.a['href'] p = Post(board, parse_pid(c), parse_num(c)) c = i.findAll('tr')[1].td.textarea.text p.parse_post(c) ret.post_list.append(p) for i in soup.body.center.findAll('a', recursive=False, limit=3): if i.text == u'本主题下30篇': ret.next_start = int(parse_href(i['href'], 'start')) return ret def fetch_page(self, board, start=None): params = {'board': board} if start: params['start'] = start html = self._do_action('bbstdoc', params) soup = BeautifulSoup(html) items = soup.findAll('tr')[1:] year = datetime.now().year ret = Page(board) for i in items: cells = i.findAll('td') h = Header() h.board = board try: h.num = int(cells[0].text) - 1 except ValueError: continue h.author = cells[2].text.strip() h.date = cells[3].text.strip() h.date = datetime.strptime(h.date, self.DATE_FORMAT) h.date = h.date.replace(year=year) h.title = cells[4].text.strip()[2:] h.pid = parse_pid(cells[4].a['href']) tmp = cells[5].text.strip() if tmp.find('/') != -1: tmp = tmp.split('/') h.reply_count = int(tmp[0]) h.view_count = int(tmp[1]) else: h.view_count = int(tmp) ret.header_list.append(h) # TODO for i in soup.body.center.findAll('a', recursive=False): if i.text == u'上一页': ret.prev_start = int(parse_href(i['href'], 'start')) - 1 return ret def fetch_top10(self): html = self._do_action('bbstop10') soup = BeautifulSoup(html) items = soup.findAll('tr')[1:] ret = Page(u'全站十大') for i in items: cells = i.findAll('td') h = Header() h.board = cells[1].text.strip() h.title = cells[2].text.strip() h.pid = parse_pid(cells[2].a['href']) h.author = cells[3].text.strip() h.reply_count = int(cells[4].text.strip()) ret.header_list.append(h) return ret def fetch_hot(self): html = self._do_action('bbstopall') soup = BeautifulSoup(html) items = soup.findAll('tr') ret = [] tmp = None for i in items: if i.img: tmp = [] continue cells = i.findAll('td') if not cells[0].text: ret.append(tmp) continue for j in cells: h = Header() links = j.findAll('a') h.title = links[0].text.strip() h.board = links[1].text.strip() h.pid = parse_pid(links[0]['href']) tmp.append(h) return ret def fetch_favorites(self): html = self._do_action('bbsleft') soup = BeautifulSoup(html) div = soup.findAll('div', {'id': 'div0'}) if not div: raise Error() items = div[0] items = items.findAll('a')[:-1] ret = [i.text for i in items] return ret def fetch_board_list(self): from time import sleep ret = BoardManager() for i in range(12): sleep(1) html = self._do_action('bbsboa', {'sec': i}) soup = BeautifulSoup(html) try: text = re.search(ur'\[(\w+?)区\]<hr', html, re.UNICODE).group(1) except AttributeError: raise ContentError(u'请勿过快刷新页面') section = Section(i, text) items = soup.findAll('tr')[1:] for i in items: cells = i.findAll('td') s = cells[5].text[2:] # Some board may have a voting in progress if s.endswith(u'V'): s = s[:-1] board = Board(cells[2].text, s) section.board_list.append(board) ret.add(section) return ret def fetch_face_list(self): html = self._do_action('editor/face.htm', {'ptext': 'text'}) soup = BeautifulSoup(html) items = soup.findAll('img') ret = {} for i in items: ret[i['title']] = i['src'] with open('FaceList.json', 'w') as f: json.dump(ret, f)
class SafeBoxClient(): def __init__(self, server_addr="localhost:8000"): self.server_addr = server_addr self.client_id = self.ccid = self.pin = None self.cookie_jar = CookieJar() self.curr_ticket = "" # startClient: Initializes the client's remaining attributes, # this implies starting a session and eventually client registration. def startClient(self, ccid, passwd, pin): # checking if client is already registered def checkClientReg_cb(success): if success == False: print "User not registered." if pin is None: print "Please provide your Citizen Card for registration" reactor.stop() return else: print "Registering user..." return self.handleRegister() #pprint(self.cookie_jar.__dict__) print "User: "******" logged in." for cookie in self.cookie_jar: #print cookie #print type(cookie) self.curr_ticket = self.client_id.decryptData(cookie.value) # Instanciating ClientIdentity def startClientId_cb(key): self.client_id = ClientIdentity(self.ccid, passwd, key) self.handleStartSession(checkClientReg_cb) self.ccid = ccid if pin is not None: self.pin = pin return self.handleGetKey(startClientId_cb) # Session, Registry and Authentication related opreations # # handleGetKey: handles getkey operations, this happens as the # first step of the startClient operation. def handleGetKey(self, method): def handleGetKey_cb(response): defer = Deferred() defer.addCallback(method) response.deliverBody(DataPrinter(defer, "getkey")) return NOT_DONE_YET agent = Agent(reactor) headers = http_headers.Headers() d = agent.request( 'GET', 'http://localhost:8000/session/?method=getkey', headers, None) d.addCallback(handleGetKey_cb) return NOT_DONE_YET # handleStartSession: handles startsession operations def handleStartSession(self, method): def procResponse_cb(response): defer = Deferred() defer.addCallback(method) response.deliverBody(DataPrinter(defer, "bool")) return NOT_DONE_YET def startSession_cb((signedNonce, nonceid)): agent = CookieAgent(Agent(reactor), self.cookie_jar) dataq = [] dataq.append(signedNonce) body = _FileProducer(StringIO(self.client_id.encryptData(self.client_id.password)) ,dataq) headers = http_headers.Headers() d = agent.request( 'PUT', 'http://localhost:8000/session/?method=startsession&ccid=' + self.ccid + '&nonceid=' + str(nonceid), headers, body) d.addCallback(procResponse_cb) return NOT_DONE_YET def getNonce_cb(response): defer = Deferred() defer.addCallback(startSession_cb) response.deliverBody(getNonce(defer, self.client_id, self.pin)) return NOT_DONE_YET if self.pin != None: agent = Agent(reactor) body = FileBodyProducer(StringIO(self.client_id.pub_key.exportKey('PEM'))) headers = http_headers.Headers() d = agent.request( 'GET', 'http://localhost:8000/session/?method=getnonce', headers, body) d.addCallback(getNonce_cb) return NOT_DONE_YET agent = CookieAgent(Agent(reactor), self.cookie_jar) body = FileBodyProducer(StringIO(self.client_id.encryptData(self.client_id.password))) headers = http_headers.Headers() d = agent.request( 'PUT', 'http://localhost:8000/session/?method=startsession&ccid=' + self.ccid + '&nonceid=' + str(-1), headers, body) d.addCallback(procResponse_cb) return NOT_DONE_YET # handleRegister: Handles the registration process. Also part of the startClient operation. def handleRegister(self): def checkClientReg_cb(success): if success == False: print "ERROR: Couldn't register user." reactor.stop() return #pprint(self.cookie_jar.__dict__) for cookie in self.cookie_jar: #print cookie #print type(cookie) self.curr_ticket = self.client_id.decryptData(cookie.value) print "Registration Successful." print "User: "******" logged in." def procResponse_cb(response, method): defer = Deferred() defer.addCallback(method) response.deliverBody(DataPrinter(defer, "bool")) return NOT_DONE_YET def register_cb((signedNonce, nonceid)): agent = CookieAgent(Agent(reactor), self.cookie_jar) dataq = [] dataq.append(signedNonce) dataq.append(self.client_id.encryptData(self.client_id.password)) # Sending the Certificate and the Sub CA to the server if self.pin is None: print "ERROR! Check the pin!" reactor.stop() cert = cc.get_certificate(cc.CERT_LABEL, self.pin) #print type(cert.as_pem()) #print cert.as_pem() if cert is None: print "ERROR! Check the pin" reactor.stop() subca = cc.get_certificate(cc.SUBCA_LABEL, self.pin) #print type(subca.as_pem()) #print subca.as_pem() if subca is None: print "ERROR! Check the pin" reactor.stop() enc_cert = b64encode(cert.as_pem()) #print "cert len: ", len(enc_cert) dataq.append(enc_cert) enc_subca = b64encode(subca.as_pem()) #print "sub ca len: ", len(enc_subca) dataq.append(enc_subca) dataq.append(self.client_id.pub_key.exportKey('PEM')) ext_key = self.client_id.pub_key.exportKey('PEM') if self.pin is None: print "ERROR! Check the pin or the CC" reactor.stop() signed_ext_key = cc.sign(ext_key, cc.KEY_LABEL, self.pin) enc_sek = b64encode(signed_ext_key) #print "encoded ext key: ", enc_sek #print "len encoded: ", len(enc_sek) dataq.append(enc_sek) body = FileProducer2(dataq) headers = http_headers.Headers() #print "Password:"******"LEN:", len(self.client_id.encryptData(self.client_id.password)) d = agent.request( 'PUT', 'http://localhost:8000/pboxes/?method=register' + '&nonceid=' + str(nonceid), headers, body) d.addCallback(procResponse_cb, checkClientReg_cb) def getNonce_cb(response): defer = Deferred() defer.addCallback(register_cb) response.deliverBody(getNonce(defer, self.client_id, self.pin)) return NOT_DONE_YET agent = Agent(reactor) body = FileBodyProducer(StringIO(self.client_id.pub_key.exportKey('PEM'))) headers = http_headers.Headers() d = agent.request( 'GET', 'http://localhost:8000/session/?method=getnonce', headers, body) d.addCallback(getNonce_cb) return NOT_DONE_YET def processCookie(self, uri): dci = number.long_to_bytes(number.bytes_to_long(self.curr_ticket) + long("1", base=10)) #print "incremented ticket", number.bytes_to_long(dci) self.curr_ticket = dci sci = self.client_id.signData(str(dci)) enc = self.client_id.encryptData(sci) for cookie in self.cookie_jar: cookie.value = enc cookie.path = uri self.cookie_jar.clear() self.cookie_jar.set_cookie(cookie) return dci #print cookie # List Operations # # handleList: handles every list command def handleList_cb(self, response): defer = Deferred() response.deliverBody(DataPrinter(defer, "list")) return NOT_DONE_YET def handleListPboxes(self): args = ("list", str(self.ccid)) salt = self.processCookie("/pboxes") body = FileBodyProducer(StringIO(self.client_id.genHashArgs(args, salt))) #print "hashed:", self.client_id.genHashArgs(args, salt) agent = CookieAgent(Agent(reactor), self.cookie_jar) headers = http_headers.Headers() d = agent.request( 'GET', 'http://localhost:8000/pboxes/?method=list&ccid=' + self.ccid, headers, body) d.addCallback(self.handleList_cb) return NOT_DONE_YET def handleListFiles(self): args = ("list", str(self.ccid)) salt = self.processCookie("/files") body = FileBodyProducer(StringIO(self.client_id.genHashArgs(args, salt))) agent = CookieAgent(Agent(reactor), self.cookie_jar) headers = http_headers.Headers() d = agent.request( 'GET', 'http://localhost:8000/files/?method=list&ccid=' + self.ccid, headers, body) d.addCallback(self.handleList_cb) return NOT_DONE_YET def handleListShares(self): args = ("list", str(self.ccid)) salt = self.processCookie("/shares") body = FileBodyProducer(StringIO(self.client_id.genHashArgs(args, salt))) agent = CookieAgent(Agent(reactor), self.cookie_jar) headers = http_headers.Headers() d = agent.request( 'GET', 'http://localhost:8000/shares/?method=list&ccid=' + self.ccid, headers, body) d.addCallback(self.handleList_cb) return NOT_DONE_YET # Get Operations # # handleGetMData: Handles get pbox metadata operations. def handleGetMData(self, data): #data = (method, tgtccid) pprint(data) def handleGetMData_cb(response): defer = Deferred() defer.addCallback(data[0]) response.deliverBody(DataPrinter(defer, "getmdata")) return NOT_DONE_YET args = ("get_mdata", str(self.ccid), data[1]) salt = self.processCookie("/pboxes") body = FileBodyProducer(StringIO(self.client_id.genHashArgs(args, salt))) agent = CookieAgent(Agent(reactor), self.cookie_jar) headers = http_headers.Headers() d = agent.request( 'GET', 'http://localhost:8000/pboxes/?method=get_mdata&ccid=' + self.ccid + "&tgtccid=" + data[1], headers, body) d.addCallback(handleGetMData_cb) return NOT_DONE_YET # handleGetFileMData: Handles get file metadata operations. def handleGetFileMData(self, data): #data = (method, fileid) def handleGetFileMData_cb(response): defer = Deferred() defer.addCallback(data[0]) response.deliverBody(DataPrinter(defer, "getmdata")) return NOT_DONE_YET args = ("get_mdata", str(self.ccid), data[1]) salt = self.processCookie("/files") body = FileBodyProducer(StringIO(self.client_id.genHashArgs(args, salt))) agent = CookieAgent(Agent(reactor), self.cookie_jar) headers = http_headers.Headers() d = agent.request( 'GET', 'http://localhost:8000/files/?method=get_mdata&ccid=' + self.ccid + "&fileid=" + data[1], headers, body) d.addCallback(handleGetFileMData_cb) return NOT_DONE_YET # handleGetShareMData: Handles get share metadata operations. def handleGetShareMData(self, data): #data = (method, fileid) def handleGetShareMData_cb(response): defer = Deferred() defer.addCallback(data[0]) response.deliverBody(DataPrinter(defer, "getmdata")) return NOT_DONE_YET args = ("get_mdata", str(self.ccid), data[1]) salt = self.processCookie("/shares") body = FileBodyProducer(StringIO(self.client_id.genHashArgs(args, salt))) agent = CookieAgent(Agent(reactor), self.cookie_jar) headers = http_headers.Headers() d = agent.request( 'GET', 'http://localhost:8000/shares/?method=get_mdata&ccid=' + self.ccid + "&fileid=" + data[1], headers, body) d.addCallback(handleGetShareMData_cb) return NOT_DONE_YET # handleGet: handles get file #def handleGet(self, line): def printResult_cb(self, data): pprint(data) #TODO: Format this! return NOT_DONE_YET # for info requests def handleGetInfo(self, s): if s[1].lower() == "pboxinfo": return self.handleGetMData((self.printResult_cb, s[2].lower())) elif s[1].lower() == "fileinfo": return self.handleGetFileMData((self.printResult_cb, s[2].lower())) elif s[1].lower() == "shareinfo": return self.handleGetShareMData((self.printResult_cb, s[2].lower())) # Decrypt and write the file def writeFile_cb(self, ignore, s): #we should implement http error code checking fileId = s[2] enc_file = open(fileId, "r") if len(s) == 4: dec_file = open(s[3], "w") else: dec_file = open(fileId + "_decrypted", "w") enc_key = enc_file.read(IV_KEY_SIZE_B64) # print "debugging: iv key writefile" # print enc_key print "Decrypting file..." key = self.client_id.decryptData(enc_key) enc_iv = enc_file.read(IV_KEY_SIZE_B64) #print enc_iv iv = self.client_id.decryptData(enc_iv) print iv self.client_id.decryptFileSym(enc_file, dec_file, key, iv) print "File written." # for get file def handleGetFile(self, s): def handleGetFile_cb(response, f): finished = Deferred() finished.addCallback(self.writeFile_cb, s) cons = FileConsumer(f) response.deliverBody(FileDownload(finished, cons)) print "Downloading file..." return finished fileId = s[2] args = ("getfile", str(self.ccid), str(fileId)) salt = self.processCookie("/files") body = FileBodyProducer(StringIO(self.client_id.genHashArgs(args, salt))) agent = CookieAgent(Agent(reactor), self.cookie_jar) headers = http_headers.Headers() d = agent.request( 'GET', 'http://localhost:8000/files/?method=getfile&ccid=' + self.ccid + '&fileid=' + str(fileId), headers, body) f = open(fileId, "w") d.addCallback(handleGetFile_cb, f) return NOT_DONE_YET # for get shared def handleGetShared(self, s): def handleGetShared_cb(response, f): finished = Deferred() finished.addCallback(self.writeFile_cb, s) cons = FileConsumer(f) response.deliverBody(FileDownload(finished, cons)) print "Downloading file..." return finished fileId = s[2] args = ("getshared", str(self.ccid), str(fileId)) salt = self.processCookie("/shares") body = FileBodyProducer(StringIO(self.client_id.genHashArgs(args, salt))) agent = CookieAgent(Agent(reactor), self.cookie_jar) headers = http_headers.Headers() d = agent.request( 'GET', 'http://localhost:8000/shares/?method=getshared&ccid=' + self.ccid + '&fileid=' + fileId, headers, body) f = open(fileId, "w") d.addCallback(handleGetShared_cb, f) return NOT_DONE_YET # Put Operations # printPutReply_cb: prints put and update responses def printPutReply_cb(self, response): print "Done." defer = Deferred() response.deliverBody(DataPrinter(defer, "getmdata")) return NOT_DONE_YET # handlePutFile: handles file upload def handlePutFile(self, line): print "Encrypting file..." s = line.split() file = open(s[2], 'r') enc_file = open("enc_fileout", 'w') crd = self.client_id.encryptFileSym(file, enc_file) args = ("putfile", str(self.ccid), os.path.basename(s[2])) salt = self.processCookie("/files") dataq = [] dataq.append( self.client_id.genHashArgs(args, salt)) dataq.append( self.client_id.encryptData(crd[0], self.client_id.pub_key)) dataq.append( self.client_id.encryptData(crd[1]) ) agent = CookieAgent(Agent(reactor), self.cookie_jar) #print crd[1] # print "debugging:key, iv putfile" # print dataq[1] # print len(dataq[1]) # print dataq[2] # print len(dataq[2]) print "Uploading file..." enc_file = open("enc_fileout", 'r') body = _FileProducer(enc_file ,dataq) headers = http_headers.Headers() d = agent.request( 'PUT', 'http://localhost:8000/files/?method=putfile&ccid=' + self.ccid + "&name=" + os.path.basename(s[2]), headers, body) d.addCallback(self.printPutReply_cb) return NOT_DONE_YET # Update Operations # #handles update commands def handleUpdate(self, s): def encryptFile_cb(data):#TODO: Some error checking here. def updateFile_cb(iv): #data = (key,) print "Updating file..." args = ("updatefile", str(self.ccid), os.path.basename(s[3]), s[2]) salt = self.processCookie("/files") dataq = [] dataq.append( self.client_id.genHashArgs(args, salt)) dataq.append( iv ) # print "debugging:ticket, iv updatefile" # print dataq[0] # print dataq[1] # print len(dataq[1]) agent = CookieAgent(Agent(reactor), self.cookie_jar) print "Uploading file..." enc_file = open("enc_fileout", 'r') body = _FileProducer(enc_file ,dataq) headers = http_headers.Headers() d = agent.request( 'POST', 'http://localhost:8000/files/?method=updatefile&ccid=' + self.ccid + "&name=" + os.path.basename(s[3]) + "&fileid=" + s[2] , headers, body) d.addCallback(self.printPutReply_cb) return NOT_DONE_YET def updateShared_cb(iv): print "Updating file..." args = ("updateshared", str(self.ccid), os.path.basename(s[3]), s[2]) salt = self.processCookie("/shares") dataq = [] dataq.append( self.client_id.genHashArgs(args, salt)) dataq.append( iv ) # print "debugging:ticket, iv updatefile" # print dataq[0] # print dataq[1] # print len(dataq[1]) print "Uploading file..." agent = CookieAgent(Agent(reactor), self.cookie_jar) enc_file = open("enc_fileout", 'r') body = _FileProducer(enc_file ,dataq) headers = http_headers.Headers() d = agent.request( 'POST', 'http://localhost:8000/shares/?method=updateshared&ccid=' + self.ccid + "&name=" + os.path.basename(s[3]) + "&fileid=" + s[2] , headers, body) d.addCallback(self.printPutReply_cb) return NOT_DONE_YET if isinstance(data, basestring): print data return print "Encrypting file..." #print data["data"]["SymKey"] enc_key = data["data"]["SymKey"] key = self.client_id.decryptData(enc_key, self.client_id.priv_key) #print len(key) file = open(s[3], 'r') enc_file = open("enc_fileout", 'w') crd = self.client_id.encryptFileSym(file, enc_file, key=key) new_iv = self.client_id.encryptData(crd[1]) if s[1] == "shared": return updateShared_cb(new_iv) return updateFile_cb(new_iv) hsmd_data = (encryptFile_cb, s[2]) if s[1] == "file": return self.handleGetFileMData(hsmd_data) return self.handleGetShareMData(hsmd_data) def handleUpdateSharePerm(self, s): args = ("updateshareperm", str(self.ccid), s[3], s[2], s[4]) salt = self.processCookie("/shares") body = FileBodyProducer(StringIO(self.client_id.genHashArgs(args, salt))) agent = CookieAgent(Agent(reactor), self.cookie_jar) headers = http_headers.Headers() d = agent.request( 'POST', 'http://localhost:8000/shares/?method=updateshareperm&ccid=' + self.ccid + "&rccid=" + s[3] + "&fileid=" + s[2] + "&writeable=" + s[4] , headers, body) d.addCallback(self.printPutReply_cb) return NOT_DONE_YET #Delete Operaions # # handleDelete: handles delete commands def handleDelete(self, line): def printDeleteReply_cb(data): if not data: print "Done." else: print "Done." def deleteFile_cb(): args = ("delete", str(self.ccid), s[2]) salt = self.processCookie("/files") body = FileBodyProducer(StringIO(self.client_id.genHashArgs(args, salt))) agent = CookieAgent(Agent(reactor), self.cookie_jar) headers = http_headers.Headers() d = agent.request( 'DELETE', 'http://localhost:8000/files/?method=delete&ccid=' + self.ccid + "&fileid=" + s[2], headers, body) d.addCallback(printDeleteReply_cb) def deleteShare_cb(): args = ("delete", str(self.ccid), s[2], s[3]) salt = self.processCookie("/shares") body = FileBodyProducer(StringIO(self.client_id.genHashArgs(args, salt))) agent = CookieAgent(Agent(reactor), self.cookie_jar) headers = http_headers.Headers() d = agent.request( 'DELETE', 'http://localhost:8000/shares/?method=delete&ccid=' + self.ccid + "&fileid=" + s[2] + "&rccid=" + s[3], headers, body) d.addCallback(printDeleteReply_cb) s = line.split() if len(s) == 4: return deleteShare_cb() if len(s) == 3: return deleteFile_cb() print "Error: invalid arguments!\n" print "Usage: delete <file|share> <fileid> <None|rccid>" return # Share Operation # def handleShare(self, line): def getFKey_cb(data): enc_key = data["data"]["SymKey"] def getDstKey_cb(data): dstkey = data["data"]["PubKey"] print "pubkey" + dstkey def shareFile_cb(): args = ("delete", str(self.ccid), s[3], s[2]) salt = self.processCookie("/shares") dataq = [] dataq.append(self.client_id.genHashArgs(args, salt)) dataq.append(enc_sym_key) print "Uploading symkey..." agent = CookieAgent(Agent(reactor), self.cookie_jar) body = _FileProducer(StringIO("") ,dataq) headers = http_headers.Headers() d = agent.request( 'PUT', 'http://localhost:8000/shares/?method=sharefile&ccid=' + self.ccid + "&rccid=" + s[3] + "&fileid=" + s[2], headers, body) d.addCallback(self.printPutReply_cb) return d #enc_key = data["data"]["SymKey"] sym_key = self.client_id.decryptData(enc_key, self.client_id.priv_key) dstkey = RSA.importKey(dstkey) enc_sym_key = self.client_id.encryptData(sym_key, dstkey) return shareFile_cb() hfmd_data = (getDstKey_cb, s[3].lower()) return self.handleGetMData(hfmd_data) s = line.split() if len(s) == 4: hmd_data = (getFKey_cb, s[2].lower()) return self.handleGetFileMData(hmd_data) else: if s[1].lower() != "file": print "Error: invalid arguments!\n" print "Usage: share file <fileid> <recipient's ccid>" return
class Site(object): """ **EarwigBot: Wiki Toolset: Site** Represents a site, with support for API queries and returning :py:class:`~earwigbot.wiki.page.Page`, :py:class:`~earwigbot.wiki.user.User`, and :py:class:`~earwigbot.wiki.category.Category` objects. The constructor takes a bunch of arguments and you probably won't need to call it directly, rather :py:meth:`wiki.get_site() <earwigbot.wiki.sitesdb.SitesDB.get_site>` for returning :py:class:`Site` instances, :py:meth:`wiki.add_site() <earwigbot.wiki.sitesdb.SitesDB.add_site>` for adding new ones to our database, and :py:meth:`wiki.remove_site() <earwigbot.wiki.sitesdb.SitesDB.remove_site>` for removing old ones from our database, should suffice. *Attributes:* - :py:attr:`name`: the site's name (or "wikiid"), like ``"enwiki"`` - :py:attr:`project`: the site's project name, like ``"wikipedia"`` - :py:attr:`lang`: the site's language code, like ``"en"`` - :py:attr:`domain`: the site's web domain, like ``"en.wikipedia.org"`` - :py:attr:`url`: the site's URL, like ``"https://en.wikipedia.org"`` *Public methods:* - :py:meth:`api_query`: does an API query with kwargs as params - :py:meth:`sql_query`: does an SQL query and yields its results - :py:meth:`get_maxlag`: returns the internal database lag - :py:meth:`get_replag`: estimates the external database lag - :py:meth:`namespace_id_to_name`: returns names associated with an NS id - :py:meth:`namespace_name_to_id`: returns the ID associated with a NS name - :py:meth:`get_page`: returns a Page for the given title - :py:meth:`get_category`: returns a Category for the given title - :py:meth:`get_user`: returns a User object for the given name - :py:meth:`delegate`: controls when the API or SQL is used """ SERVICE_API = 1 SERVICE_SQL = 2 def __init__(self, name=None, project=None, lang=None, base_url=None, article_path=None, script_path=None, sql=None, namespaces=None, login=(None, None), cookiejar=None, user_agent=None, use_https=False, assert_edit=None, maxlag=None, wait_between_queries=2, logger=None, search_config=None): """Constructor for new Site instances. This probably isn't necessary to call yourself unless you're building a Site that's not in your config and you don't want to add it - normally all you need is wiki.get_site(name), which creates the Site for you based on your config file and the sites database. We accept a bunch of kwargs, but the only ones you really "need" are *base_url* and *script_path*; this is enough to figure out an API url. *login*, a tuple of (username, password), is highly recommended. *cookiejar* will be used to store cookies, and we'll use a normal CookieJar if none is given. First, we'll store the given arguments as attributes, then set up our URL opener. We'll load any of the attributes that weren't given from the API, and then log in if a username/pass was given and we aren't already logged in. """ # Attributes referring to site information, filled in by an API query # if they are missing (and an API url can be determined): self._name = name self._project = project self._lang = lang self._base_url = base_url self._article_path = article_path self._script_path = script_path self._namespaces = namespaces # Attributes used for API queries: self._use_https = use_https self._assert_edit = assert_edit self._maxlag = maxlag self._wait_between_queries = wait_between_queries self._max_retries = 6 self._last_query_time = 0 self._api_lock = Lock() self._api_info_cache = {"maxlag": 0, "lastcheck": 0} # Attributes used for SQL queries: if sql: self._sql_data = sql else: self._sql_data = {} self._sql_conn = None self._sql_lock = Lock() self._sql_info_cache = {"replag": 0, "lastcheck": 0, "usable": None} # Attribute used in copyright violation checks (see CopyrightMixIn): if search_config: self._search_config = search_config else: self._search_config = {} # Set up cookiejar and URL opener for making API queries: if cookiejar is not None: self._cookiejar = cookiejar else: self._cookiejar = CookieJar() if not user_agent: user_agent = constants.USER_AGENT # Set default UA self._opener = build_opener(HTTPCookieProcessor(self._cookiejar)) self._opener.addheaders = [("User-Agent", user_agent), ("Accept-Encoding", "gzip")] # Set up our internal logger: if logger: self._logger = logger else: # Just set up a null logger to eat up our messages: self._logger = getLogger("earwigbot.wiki") self._logger.addHandler(NullHandler()) # Get all of the above attributes that were not specified as arguments: self._load_attributes() # If we have a name/pass and the API says we're not logged in, log in: self._login_info = name, password = login if name and password: logged_in_as = self._get_username_from_cookies() if not logged_in_as or name.replace("_", " ") != logged_in_as: self._login(login) def __repr__(self): """Return the canonical string representation of the Site.""" res = ", ".join(( "Site(name={_name!r}", "project={_project!r}", "lang={_lang!r}", "base_url={_base_url!r}", "article_path={_article_path!r}", "script_path={_script_path!r}", "use_https={_use_https!r}", "assert_edit={_assert_edit!r}", "maxlag={_maxlag!r}", "sql={_sql_data!r}", "login={0}", "user_agent={2!r}", "cookiejar={1})")) name, password = self._login_info login = "******".format(repr(name), "hidden" if password else None) cookies = self._cookiejar.__class__.__name__ if hasattr(self._cookiejar, "filename"): cookies += "({0!r})".format(getattr(self._cookiejar, "filename")) else: cookies += "()" agent = self._opener.addheaders[0][1] return res.format(login, cookies, agent, **self.__dict__) def __str__(self): """Return a nice string representation of the Site.""" res = "<Site {0} ({1}:{2}) at {3}>" return res.format(self.name, self.project, self.lang, self.domain) def _unicodeify(self, value, encoding="utf8"): """Return input as unicode if it's not unicode to begin with.""" if isinstance(value, unicode): return value return unicode(value, encoding) def _urlencode_utf8(self, params): """Implement urllib.urlencode() with support for unicode input.""" enc = lambda s: s.encode("utf8") if isinstance(s, unicode) else str(s) args = [] for key, val in params.iteritems(): key = quote_plus(enc(key)) val = quote_plus(enc(val)) args.append(key + "=" + val) return "&".join(args) def _api_query(self, params, tries=0, wait=5, ignore_maxlag=False): """Do an API query with *params* as a dict of parameters. See the documentation for :py:meth:`api_query` for full implementation details. """ since_last_query = time() - self._last_query_time # Throttling support if since_last_query < self._wait_between_queries: wait_time = self._wait_between_queries - since_last_query log = "Throttled: waiting {0} seconds".format(round(wait_time, 2)) self._logger.debug(log) sleep(wait_time) self._last_query_time = time() url, data = self._build_api_query(params, ignore_maxlag) if "lgpassword" in params: self._logger.debug("{0} -> <hidden>".format(url)) else: self._logger.debug("{0} -> {1}".format(url, data)) try: response = self._opener.open(url, data) except URLError as error: if hasattr(error, "reason"): e = "API query failed: {0}.".format(error.reason) elif hasattr(error, "code"): e = "API query failed: got an error code of {0}." e = e.format(error.code) else: e = "API query failed." raise exceptions.APIError(e) result = response.read() if response.headers.get("Content-Encoding") == "gzip": stream = StringIO(result) gzipper = GzipFile(fileobj=stream) result = gzipper.read() return self._handle_api_query_result(result, params, tries, wait) def _build_api_query(self, params, ignore_maxlag): """Given API query params, return the URL to query and POST data.""" if not self._base_url or self._script_path is None: e = "Tried to do an API query, but no API URL is known." raise exceptions.APIError(e) url = ''.join((self.url, self._script_path, "/api.php")) params["format"] = "json" # This is the only format we understand if self._assert_edit: # If requested, ensure that we're logged in params["assert"] = self._assert_edit if self._maxlag and not ignore_maxlag: # If requested, don't overload the servers: params["maxlag"] = self._maxlag data = self._urlencode_utf8(params) return url, data def _handle_api_query_result(self, result, params, tries, wait): """Given the result of an API query, attempt to return useful data.""" try: res = loads(result) # Try to parse as a JSON object except ValueError: e = "API query failed: JSON could not be decoded." raise exceptions.APIError(e) try: code = res["error"]["code"] info = res["error"]["info"] except (TypeError, KeyError): # Having these keys indicates a problem return res # All is well; return the decoded JSON if code == "maxlag": # We've been throttled by the server if tries >= self._max_retries: e = "Maximum number of retries reached ({0})." raise exceptions.APIError(e.format(self._max_retries)) tries += 1 msg = 'Server says "{0}"; retrying in {1} seconds ({2}/{3})' self._logger.info(msg.format(info, wait, tries, self._max_retries)) sleep(wait) return self._api_query(params, tries=tries, wait=wait*2) else: # Some unknown error occurred e = 'API query failed: got error "{0}"; server says: "{1}".' error = exceptions.APIError(e.format(code, info)) error.code, error.info = code, info raise error def _load_attributes(self, force=False): """Load data about our Site from the API. This function is called by __init__() when one of the site attributes was not given as a keyword argument. We'll do an API query to get the missing data, but only if there actually *is* missing data. Additionally, you can call this with *force* set to True to forcibly reload all attributes. """ # All attributes to be loaded, except _namespaces, which is a special # case because it requires additional params in the API query: attrs = [self._name, self._project, self._lang, self._base_url, self._article_path, self._script_path] params = {"action": "query", "meta": "siteinfo", "siprop": "general"} if not self._namespaces or force: params["siprop"] += "|namespaces|namespacealiases" result = self.api_query(**params) self._load_namespaces(result) elif all(attrs): # Everything is already specified and we're not told return # to force a reload, so do nothing else: # We're only loading attributes other than _namespaces result = self.api_query(**params) res = result["query"]["general"] self._name = res["wikiid"] self._project = res["sitename"].lower() self._lang = res["lang"] self._base_url = res["server"] self._article_path = res["articlepath"] self._script_path = res["scriptpath"] def _load_namespaces(self, result): """Fill self._namespaces with a dict of namespace IDs and names. Called by _load_attributes() with API data as *result* when self._namespaces was not given as an kwarg to __init__(). """ self._namespaces = {} for namespace in result["query"]["namespaces"].values(): ns_id = namespace["id"] name = namespace["*"] try: canonical = namespace["canonical"] except KeyError: self._namespaces[ns_id] = [name] else: if name != canonical: self._namespaces[ns_id] = [name, canonical] else: self._namespaces[ns_id] = [name] for namespace in result["query"]["namespacealiases"]: ns_id = namespace["id"] alias = namespace["*"] self._namespaces[ns_id].append(alias) def _get_cookie(self, name, domain): """Return the named cookie unless it is expired or doesn't exist.""" for cookie in self._cookiejar: if cookie.name == name and cookie.domain == domain: if cookie.is_expired(): break return cookie def _get_username_from_cookies(self): """Try to return our username based solely on cookies. First, we'll look for a cookie named self._name + "Token", like "enwikiToken". If it exists and isn't expired, we'll assume it's valid and try to return the value of the cookie self._name + "UserName" (like "enwikiUserName"). This should work fine on wikis without single-user login. If `enwikiToken` doesn't exist, we'll try to find a cookie named `centralauth_Token`. If this exists and is not expired, we'll try to return the value of `centralauth_User`. If we didn't get any matches, we'll return None. Our goal here isn't to return the most likely username, or what we *want* our username to be (for that, we'd do self._login_info[0]), but rather to get our current username without an unnecessary ?action=query&meta=userinfo API query. """ name = ''.join((self._name, "Token")) cookie = self._get_cookie(name, self.domain) if cookie: name = ''.join((self._name, "UserName")) user_name = self._get_cookie(name, self.domain) if user_name: return unquote_plus(user_name.value) for cookie in self._cookiejar: if cookie.name != "centralauth_Token" or cookie.is_expired(): continue base = cookie.domain if base.startswith(".") and not cookie.domain_initial_dot: base = base[1:] if self.domain.endswith(base): user_name = self._get_cookie("centralauth_User", cookie.domain) if user_name: return unquote_plus(user_name.value) def _get_username_from_api(self): """Do a simple API query to get our username and return it. This is a reliable way to make sure we are actually logged in, because it doesn't deal with annoying cookie logic, but it results in an API query that is unnecessary in some cases. Called by _get_username() (in turn called by get_user() with no username argument) when cookie lookup fails, probably indicating that we are logged out. """ result = self.api_query(action="query", meta="userinfo") return result["query"]["userinfo"]["name"] def _get_username(self): """Return the name of the current user, whether logged in or not. First, we'll try to deduce it solely from cookies, to avoid an unnecessary API query. For the cookie-detection method, see _get_username_from_cookies()'s docs. If our username isn't in cookies, then we're probably not logged in, or something fishy is going on (like forced logout). In this case, do a single API query for our username (or IP address) and return that. """ name = self._get_username_from_cookies() if name: return name return self._get_username_from_api() def _save_cookiejar(self): """Try to save our cookiejar after doing a (normal) login or logout. Calls the standard .save() method with no filename. Don't fret if our cookiejar doesn't support saving (CookieJar raises AttributeError, FileCookieJar raises NotImplementedError) or no default filename was given (LWPCookieJar and MozillaCookieJar raise ValueError). """ if hasattr(self._cookiejar, "save"): try: getattr(self._cookiejar, "save")() except (NotImplementedError, ValueError): pass def _login(self, login, token=None, attempt=0): """Safely login through the API. Normally, this is called by __init__() if a username and password have been provided and no valid login cookies were found. The only other time it needs to be called is when those cookies expire, which is done automatically by api_query() if a query fails. Recent versions of MediaWiki's API have fixed a CSRF vulnerability, requiring login to be done in two separate requests. If the response from from our initial request is "NeedToken", we'll do another one with the token. If login is successful, we'll try to save our cookiejar. Raises LoginError on login errors (duh), like bad passwords and nonexistent usernames. *login* is a (username, password) tuple. *token* is the token returned from our first request, and *attempt* is to prevent getting stuck in a loop if MediaWiki isn't acting right. """ name, password = login if token: result = self.api_query(action="login", lgname=name, lgpassword=password, lgtoken=token) else: result = self.api_query(action="login", lgname=name, lgpassword=password) res = result["login"]["result"] if res == "Success": self._save_cookiejar() elif res == "NeedToken" and attempt == 0: token = result["login"]["token"] return self._login(login, token, attempt=1) else: if res == "Illegal": e = "The provided username is illegal." elif res == "NotExists": e = "The provided username does not exist." elif res == "EmptyPass": e = "No password was given." elif res == "WrongPass" or res == "WrongPluginPass": e = "The given password is incorrect." else: e = "Couldn't login; server says '{0}'.".format(res) raise exceptions.LoginError(e) def _logout(self): """Safely logout through the API. We'll do a simple API request (api.php?action=logout), clear our cookiejar (which probably contains now-invalidated cookies) and try to save it, if it supports that sort of thing. """ self.api_query(action="logout") self._cookiejar.clear() self._save_cookiejar() def _sql_connect(self, **kwargs): """Attempt to establish a connection with this site's SQL database. oursql.connect() will be called with self._sql_data as its kwargs. Any kwargs given to this function will be passed to connect() and will have precedence over the config file. Will raise SQLError() if the module "oursql" is not available. oursql may raise its own exceptions (e.g. oursql.InterfaceError) if it cannot establish a connection. """ if not oursql: e = "Module 'oursql' is required for SQL queries." raise exceptions.SQLError(e) args = self._sql_data for key, value in kwargs.iteritems(): args[key] = value if "read_default_file" not in args and "user" not in args and "passwd" not in args: args["read_default_file"] = expanduser("~/.my.cnf") if "autoping" not in args: args["autoping"] = True if "autoreconnect" not in args: args["autoreconnect"] = True self._sql_conn = oursql.connect(**args) def _get_service_order(self): """Return a preferred order for using services (e.g. the API and SQL). A list is returned, starting with the most preferred service first and ending with the least preferred one. Currently, there are only two services. SERVICE_API will always be included since the API is expected to be always usable. In normal circumstances, self.SERVICE_SQL will be first (with the API second), since using SQL directly is easier on the servers than making web queries with the API. self.SERVICE_SQL will be second if replag is greater than three minutes (a cached value updated every two minutes at most), *unless* API lag is also very high. self.SERVICE_SQL will not be included in the list if we cannot form a proper SQL connection. """ now = time() if now - self._sql_info_cache["lastcheck"] > 120: self._sql_info_cache["lastcheck"] = now try: self._sql_info_cache["replag"] = sqllag = self.get_replag() except (exceptions.SQLError, oursql.Error): self._sql_info_cache["usable"] = False return [self.SERVICE_API] self._sql_info_cache["usable"] = True else: if not self._sql_info_cache["usable"]: return [self.SERVICE_API] sqllag = self._sql_info_cache["replag"] if sqllag > 300: if not self._maxlag: return [self.SERVICE_API, self.SERVICE_SQL] if now - self._api_info_cache["lastcheck"] > 300: self._api_info_cache["lastcheck"] = now try: self._api_info_cache["maxlag"] = apilag = self.get_maxlag() except exceptions.APIError: self._api_info_cache["maxlag"] = apilag = 0 else: apilag = self._api_info_cache["maxlag"] if apilag > self._maxlag: return [self.SERVICE_SQL, self.SERVICE_API] return [self.SERVICE_API, self.SERVICE_SQL] return [self.SERVICE_SQL, self.SERVICE_API] @property def name(self): """The Site's name (or "wikiid" in the API), like ``"enwiki"``.""" return self._name @property def project(self): """The Site's project name in lowercase, like ``"wikipedia"``.""" return self._project @property def lang(self): """The Site's language code, like ``"en"`` or ``"es"``.""" return self._lang @property def domain(self): """The Site's web domain, like ``"en.wikipedia.org"``.""" return urlparse(self._base_url).netloc @property def url(self): """The Site's full base URL, like ``"https://en.wikipedia.org"``.""" url = self._base_url if url.startswith("//"): # Protocol-relative URLs from 1.18 if self._use_https: url = "https:" + url else: url = "http:" + url return url def api_query(self, **kwargs): """Do an API query with `kwargs` as the parameters. This will first attempt to construct an API url from :py:attr:`self._base_url` and :py:attr:`self._script_path`. We need both of these, or else we'll raise :py:exc:`~earwigbot.exceptions.APIError`. If :py:attr:`self._base_url` is protocol-relative (introduced in MediaWiki 1.18), we'll choose HTTPS only if :py:attr:`self._user_https` is ``True``, otherwise HTTP. We'll encode the given params, adding ``format=json`` along the way, as well as ``&assert=`` and ``&maxlag=`` based on :py:attr:`self._assert_edit` and :py:attr:`_maxlag` respectively. Additionally, we'll sleep a bit if the last query was made fewer than :py:attr:`self._wait_between_queries` seconds ago. The request is made through :py:attr:`self._opener`, which has cookie support (:py:attr:`self._cookiejar`), a ``User-Agent`` (:py:const:`earwigbot.wiki.constants.USER_AGENT`), and ``Accept-Encoding`` set to ``"gzip"``. Assuming everything went well, we'll gunzip the data (if compressed), load it as a JSON object, and return it. If our request failed for some reason, we'll raise :py:exc:`~earwigbot.exceptions.APIError` with details. If that reason was due to maxlag, we'll sleep for a bit and then repeat the query until we exceed :py:attr:`self._max_retries`. There is helpful MediaWiki API documentation at `MediaWiki.org <http://www.mediawiki.org/wiki/API>`_. """ with self._api_lock: return self._api_query(kwargs) def sql_query(self, query, params=(), plain_query=False, dict_cursor=False, cursor_class=None, show_table=False): """Do an SQL query and yield its results. If *plain_query* is ``True``, we will force an unparameterized query. Specifying both *params* and *plain_query* will cause an error. If *dict_cursor* is ``True``, we will use :py:class:`oursql.DictCursor` as our cursor, otherwise the default :py:class:`oursql.Cursor`. If *cursor_class* is given, it will override this option. If *show_table* is True, the name of the table will be prepended to the name of the column. This will mainly affect an :py:class:`~oursql.DictCursor`. Example usage:: >>> query = "SELECT user_id, user_registration FROM user WHERE user_name = ?" >>> params = ("The Earwig",) >>> result1 = site.sql_query(query, params) >>> result2 = site.sql_query(query, params, dict_cursor=True) >>> for row in result1: print row (7418060L, '20080703215134') >>> for row in result2: print row {'user_id': 7418060L, 'user_registration': '20080703215134'} This may raise :py:exc:`~earwigbot.exceptions.SQLError` or one of oursql's exceptions (:py:exc:`oursql.ProgrammingError`, :py:exc:`oursql.InterfaceError`, ...) if there were problems with the query. See :py:meth:`_sql_connect` for information on how a connection is acquired. Also relevant is `oursql's documentation <http://packages.python.org/oursql>`_ for details on that package. """ if not cursor_class: if dict_cursor: cursor_class = oursql.DictCursor else: cursor_class = oursql.Cursor klass = cursor_class with self._sql_lock: if not self._sql_conn: self._sql_connect() with self._sql_conn.cursor(klass, show_table=show_table) as cur: cur.execute(query, params, plain_query) for result in cur: yield result def get_maxlag(self, showall=False): """Return the internal database replication lag in seconds. In a typical setup, this function returns the replication lag *within* the WMF's cluster, *not* external replication lag affecting the Toolserver (see :py:meth:`get_replag` for that). This is useful when combined with the ``maxlag`` API query param (added by config), in which queries will be halted and retried if the lag is too high, usually above five seconds. With *showall*, will return a list of the lag for all servers in the cluster, not just the one with the highest lag. """ params = {"action": "query", "meta": "siteinfo", "siprop": "dbrepllag"} if showall: params["sishowalldb"] = 1 with self._api_lock: result = self._api_query(params, ignore_maxlag=True) if showall: return [server["lag"] for server in result["query"]["dbrepllag"]] return result["query"]["dbrepllag"][0]["lag"] def get_replag(self): """Return the estimated external database replication lag in seconds. Requires SQL access. This function only makes sense on a replicated database (e.g. the Wikimedia Toolserver) and on a wiki that receives a large number of edits (ideally, at least one per second), or the result may be larger than expected, since it works by subtracting the current time from the timestamp of the latest recent changes event. This may raise :py:exc:`~earwigbot.exceptions.SQLError` or one of oursql's exceptions (:py:exc:`oursql.ProgrammingError`, :py:exc:`oursql.InterfaceError`, ...) if there were problems. """ query = """SELECT UNIX_TIMESTAMP() - UNIX_TIMESTAMP(rc_timestamp) FROM recentchanges ORDER BY rc_timestamp DESC LIMIT 1""" result = list(self.sql_query(query)) return result[0][0] def namespace_id_to_name(self, ns_id, all=False): """Given a namespace ID, returns associated namespace names. If *all* is ``False`` (default), we'll return the first name in the list, which is usually the localized version. Otherwise, we'll return the entire list, which includes the canonical name. For example, this returns ``u"Wikipedia"`` if *ns_id* = ``4`` and *all* is ``False`` on ``enwiki``; returns ``[u"Wikipedia", u"Project", u"WP"]`` if *ns_id* = ``4`` and *all* is ``True``. Raises :py:exc:`~earwigbot.exceptions.NamespaceNotFoundError` if the ID is not found. """ try: if all: return self._namespaces[ns_id] else: return self._namespaces[ns_id][0] except KeyError: e = "There is no namespace with id {0}.".format(ns_id) raise exceptions.NamespaceNotFoundError(e) def namespace_name_to_id(self, name): """Given a namespace name, returns the associated ID. Like :py:meth:`namespace_id_to_name`, but reversed. Case is ignored, because namespaces are assumed to be case-insensitive. Raises :py:exc:`~earwigbot.exceptions.NamespaceNotFoundError` if the name is not found. """ lname = name.lower() for ns_id, names in self._namespaces.items(): lnames = [n.lower() for n in names] # Be case-insensitive if lname in lnames: return ns_id e = "There is no namespace with name '{0}'.".format(name) raise exceptions.NamespaceNotFoundError(e) def get_page(self, title, follow_redirects=False, pageid=None): """Return a :py:class:`Page` object for the given title. *follow_redirects* is passed directly to :py:class:`~earwigbot.wiki.page.Page`'s constructor. Also, this will return a :py:class:`~earwigbot.wiki.category.Category` object instead if the given title is in the category namespace. As :py:class:`~earwigbot.wiki.category.Category` is a subclass of :py:class:`~earwigbot.wiki.page.Page`, this should not cause problems. Note that this doesn't do any direct checks for existence or redirect-following: :py:class:`~earwigbot.wiki.page.Page`'s methods provide that. """ title = self._unicodeify(title) prefixes = self.namespace_id_to_name(constants.NS_CATEGORY, all=True) prefix = title.split(":", 1)[0] if prefix != title: # Avoid a page that is simply "Category" if prefix in prefixes: return Category(self, title, follow_redirects, pageid, self._logger) return Page(self, title, follow_redirects, pageid, self._logger) def get_category(self, catname, follow_redirects=False, pageid=None): """Return a :py:class:`Category` object for the given category name. *catname* should be given *without* a namespace prefix. This method is really just shorthand for :py:meth:`get_page("Category:" + catname) <get_page>`. """ catname = self._unicodeify(catname) prefix = self.namespace_id_to_name(constants.NS_CATEGORY) pagename = u':'.join((prefix, catname)) return Category(self, pagename, follow_redirects, pageid, self._logger) def get_user(self, username=None): """Return a :py:class:`User` object for the given username. If *username* is left as ``None``, then a :py:class:`~earwigbot.wiki.user.User` object representing the currently logged-in (or anonymous!) user is returned. """ if username: username = self._unicodeify(username) else: username = self._get_username() return User(self, username, self._logger) def delegate(self, services, args=None, kwargs=None): """Delegate a task to either the API or SQL depending on conditions. *services* should be a dictionary in which the key is the service name (:py:attr:`self.SERVICE_API <SERVICE_API>` or :py:attr:`self.SERVICE_SQL <SERVICE_SQL>`), and the value is the function to call for this service. All functions will be passed the same arguments the tuple *args* and the dict **kwargs**, which are both empty by default. The service order is determined by :py:meth:`_get_service_order`. Not every service needs an entry in the dictionary. Will raise :py:exc:`~earwigbot.exceptions.NoServiceError` if an appropriate service cannot be found. """ if not args: args = () if not kwargs: kwargs = {} order = self._get_service_order() for srv in order: if srv in services: try: return services[srv](*args, **kwargs) except exceptions.ServiceError: continue raise exceptions.NoServiceError(services)
class HLSFetcher(object): def __init__(self, url, **options): self.program = options.get('program',1) self.hls_headers = options.get('headers',{}) self.path = options.get('path',None) self.bitrate = options.get('bitrate',200000) self.nbuffer = options.get('buffer',3) self.n_segments_keep = options.get('keep',self.nbuffer+1) url = urllib.unquote(url) self.puser = options.get('puser') self.ppass = options.get('ppass') self.purl = options.get('purl') us = url.split('|') if len(us) > 1: self.url = us[0] for hd in us[1:]: self.hls_headers.update(dict(urlparse.parse_qsl(hd.strip()))) else: self.url = url self.agent = self.hls_headers.pop('User-Agent', 'Enigma2 Mediaplayer') if not self.path: self.path = tempfile.mkdtemp() self._program_playlist = None self._file_playlist = None self._cookies = CookieJar() self._cached_files = {} # sequence n -> path self._run = True self._poolHelper = TwHTTP11PoolHelper(retryAutomatically=True) self._files = None # the iter of the playlist files download self._next_download = None # the delayed download defer, if any self._file_playlisted = None # the defer to wait until new files are added to playlist self._new_filed = None self._seg_task = None def _get_page(self, url): url = url.encode("utf-8") if 'HLS_RESET_COOKIES' in os.environ.keys(): self._cookies.clear() timeout = 10 return twAgentGetPage(url, agent=self.agent, cookieJar=self._cookies, headers=self.hls_headers, timeout=timeout, pool=self._poolHelper._pool, proxy_url=self.purl, p_user=self.puser, p_pass=self.ppass) def _download_page(self, url, path, file): def _decrypt(data): def num_to_iv(n): iv = struct.pack(">8xq", n) return b"\x00" * (16 - len(iv)) + iv if not self._file_playlist._iv: iv = num_to_iv(file['sequence']) aes = AES.new(self._file_playlist._key, AES.MODE_CBC, iv) else: aes = AES.new(self._file_playlist._key, AES.MODE_CBC, self._file_playlist._iv) return aes.decrypt(data) d = self._get_page(url) if self._file_playlist._key: d.addCallback(_decrypt) return d def _download_segment(self, f): url = make_url(self._file_playlist.url, f['file']) name = 'seg_' + next(tempfile._get_candidate_names()) path = os.path.join(self.path, name) d = self._download_page(url, path, f) if self.n_segments_keep != 0: file = open(path, 'wb') d.addCallback(lambda x: file.write(x)) d.addBoth(lambda _: file.close()) d.addCallback(lambda _: path) d.addErrback(self._got_file_failed) d.addCallback(self._got_file, url, f) else: d.addCallback(lambda _: (None, path, f)) return d def delete_cache(self, f): bgFileEraser = eBackgroundFileEraser.getInstance() keys = self._cached_files.keys() for i in ifilter(f, keys): filename = self._cached_files[i] bgFileEraser.erase(str(filename)) del self._cached_files[i] def delete_all_cache(self): bgFileEraser = eBackgroundFileEraser.getInstance() for path in self._cached_files.itervalues(): bgFileEraser.erase(str(path)) self._cached_files.clear() def _got_file_failed(self, e): if self._new_filed: self._new_filed.errback(e) self._new_filed = None def _got_file(self, path, url, f): self._cached_files[f['sequence']] = path if self.n_segments_keep != -1: self.delete_cache(lambda x: x <= f['sequence'] - self.n_segments_keep) if self._new_filed: self._new_filed.callback((path, url, f)) self._new_filed = None return (path, url, f) def _get_next_file(self): next = self._files.next() if next: return self._download_segment(next) elif not self._file_playlist.endlist(): self._seg_task.stop() self._file_playlisted = defer.Deferred() self._file_playlisted.addCallback(lambda x: self._get_next_file()) self._file_playlisted.addCallback(self._next_file_delay) self._file_playlisted.addCallback(self._seg_task.start) return self._file_playlisted def _handle_end(self, failure): failure.trap(StopIteration) print "End of media" def _next_file_delay(self, f): if f == None: return 0 delay = f[2]["duration"] if self.nbuffer > 0: for i in range(0,self.nbuffer): if self._cached_files.has_key(f[2]['sequence'] - i): return delay delay = 0 elif self._file_playlist.endlist(): delay = 1 return delay def _get_files_loop(self, res=None): if not self._seg_task: self._seg_task = task.LoopingCall(self._get_next_file) d = self._get_next_file() if d != None: self._seg_task.stop() d.addCallback(self._next_file_delay) d.addCallback(self._seg_task.start) d.addErrback(self._handle_end) def _playlist_updated(self, pl): if pl and pl.has_programs(): # if we got a program playlist, save it and start a program self._program_playlist = pl (program_url, _) = pl.get_program_playlist(self.program, self.bitrate) return self._reload_playlist(M3U8(program_url, self._cookies, self.hls_headers)) elif pl and pl.has_files(): # we got sequence playlist, start reloading it regularly, and get files self._file_playlist = pl if not self._files: self._files = pl.iter_files() if not pl.endlist(): reactor.callLater(pl.reload_delay(), self._reload_playlist, pl) if self._file_playlisted: self._file_playlisted.callback(pl) self._file_playlisted = None else: raise Exception('Playlist has no valid content.') return pl def _got_playlist_content(self, content, pl): if not pl.update(content) and self._run: # if the playlist cannot be loaded, start a reload timer d = deferLater(reactor, pl.reload_delay(), self._fetch_playlist, pl) d.addCallback(self._got_playlist_content, pl) return d return pl def _fetch_playlist(self, pl): d = self._get_page(pl.url) return d def _reload_playlist(self, pl): if self._run: d = self._fetch_playlist(pl) d.addCallback(self._got_playlist_content, pl) d.addCallback(self._playlist_updated) return d else: return None def get_file(self, sequence): d = defer.Deferred() keys = self._cached_files.keys() try: endlist = sequence == self._file_playlist._end_sequence sequence = ifilter(lambda x: x >= sequence, keys).next() filename = self._cached_files[sequence] d.callback((filename, endlist)) except: d.addCallback(lambda x: self.get_file(sequence)) self._new_filed = d keys.sort() return d def _start_get_files(self, x): self._new_filed = defer.Deferred() self._get_files_loop() return self._new_filed def start(self): if self._run: self._files = None d = self._reload_playlist(M3U8(self.url, self._cookies, self.hls_headers)) d.addCallback(self._start_get_files) return d def stop(self): self._run = False self._poolHelper.close() if self._seg_task != None: self._seg_task.stop() if self._new_filed != None: self._new_filed.cancel() reactor.callLater(1, self.delete_all_cache)
class Bugz: """ Converts sane method calls to Bugzilla HTTP requests. @ivar base: base url of bugzilla. @ivar user: username for authenticated operations. @ivar password: password for authenticated operations @ivar cookiejar: for authenticated sessions so we only auth once. @ivar forget: forget user/password after session. @ivar authenticated: is this session authenticated already """ def __init__(self, base, user=None, password=None, forget=False, skip_auth=False, httpuser=None, httppassword=None): """ {user} and {password} will be prompted if an action needs them and they are not supplied. if {forget} is set, the login cookie will be destroyed on quit. @param base: base url of the bugzilla @type base: string @keyword user: username for authenticated actions. @type user: string @keyword password: password for authenticated actions. @type password: string @keyword forget: forget login session after termination. @type forget: bool @keyword skip_auth: do not authenticate @type skip_auth: bool """ self.base = base scheme, self.host, self.path, query, frag = urlsplit(self.base) self.authenticated = False self.forget = forget if not self.forget: try: cookie_file = os.path.join(os.environ["HOME"], COOKIE_FILE) self.cookiejar = LWPCookieJar(cookie_file) if forget: try: self.cookiejar.load() self.cookiejar.clear() self.cookiejar.save() os.chmod(self.cookiejar.filename, 0600) except IOError: pass except KeyError: self.warn("Unable to save session cookies in %s" % cookie_file) self.cookiejar = CookieJar(cookie_file) else: self.cookiejar = CookieJar() self.opener = build_opener(HTTPCookieProcessor(self.cookiejar)) self.user = user self.password = password self.httpuser = httpuser self.httppassword = httppassword self.skip_auth = skip_auth def log(self, status_msg): """Default logging handler. Expected to be overridden by the UI implementing subclass. @param status_msg: status message to print @type status_msg: string """ return def warn(self, warn_msg): """Default logging handler. Expected to be overridden by the UI implementing subclass. @param status_msg: status message to print @type status_msg: string """ return def get_input(self, prompt): """Default input handler. Expected to be override by the UI implementing subclass. @param prompt: Prompt message @type prompt: string """ return "" def auth(self): """Authenticate a session. """ # check if we need to authenticate if self.authenticated: return # try seeing if we really need to request login if not self.forget: try: self.cookiejar.load() except IOError: pass req_url = urljoin(self.base, config.urls["auth"]) req_url += "?GoAheadAndLogIn=1" req = Request(req_url, None, config.headers) if self.httpuser and self.httppassword: base64string = base64.encodestring("%s:%s" % (self.httpuser, self.httppassword))[:-1] req.add_header("Authorization", "Basic %s" % base64string) resp = self.opener.open(req) re_request_login = re.compile(r"<title>.*Log in to .*</title>") if not re_request_login.search(resp.read()): self.log("Already logged in.") self.authenticated = True return # prompt for username if we were not supplied with it if not self.user: self.log("No username given.") self.user = self.get_input("Username: "******"No password given.") self.password = getpass.getpass() # perform login qparams = config.params["auth"].copy() qparams["Bugzilla_login"] = self.user qparams["Bugzilla_password"] = self.password if not self.forget: qparams["Bugzilla_remember"] = "on" req_url = urljoin(self.base, config.urls["auth"]) req = Request(req_url, urlencode(qparams), config.headers) if self.httpuser and self.httppassword: base64string = base64.encodestring("%s:%s" % (self.httpuser, self.httppassword))[:-1] req.add_header("Authorization", "Basic %s" % base64string) resp = self.opener.open(req) if resp.info().has_key("Set-Cookie"): self.authenticated = True if not self.forget: self.cookiejar.save() os.chmod(self.cookiejar.filename, 0600) return True else: raise RuntimeError("Failed to login") def extractResults(self, resp): # parse the results into dicts. results = [] columns = [] rows = [] for r in csv.reader(resp): rows.append(r) for field in rows[0]: if config.choices["column_alias"].has_key(field): columns.append(config.choices["column_alias"][field]) else: self.log("Unknown field: " + field) columns.append(field) for row in rows[1:]: if "Missing Search" in row[0]: self.log("Bugzilla error (Missing search found)") return None fields = {} for i in range(min(len(row), len(columns))): fields[columns[i]] = row[i] results.append(fields) return results def search( self, query, comments=False, order="number", assigned_to=None, reporter=None, cc=None, commenter=None, whiteboard=None, keywords=None, status=[], severity=[], priority=[], product=[], component=[], ): """Search bugzilla for a bug. @param query: query string to search in title or {comments}. @type query: string @param order: what order to returns bugs in. @type order: string @keyword assigned_to: email address which the bug is assigned to. @type assigned_to: string @keyword reporter: email address matching the bug reporter. @type reporter: string @keyword cc: email that is contained in the CC list @type cc: string @keyword commenter: email of a commenter. @type commenter: string @keyword whiteboard: string to search in status whiteboard (gentoo?) @type whiteboard: string @keyword keywords: keyword to search for @type keywords: string @keyword status: bug status to match. default is ['NEW', 'ASSIGNED', 'REOPENED']. @type status: list @keyword severity: severity to match, empty means all. @type severity: list @keyword priority: priority levels to patch, empty means all. @type priority: list @keyword comments: search comments instead of just bug title. @type comments: bool @keyword product: search within products. empty means all. @type product: list @keyword component: search within components. empty means all. @type component: list @return: list of bugs, each bug represented as a dict @rtype: list of dicts """ if not self.authenticated and not self.skip_auth: self.auth() qparams = config.params["list"].copy() qparams["value0-0-0"] = query if comments: qparams["type0-0-1"] = qparams["type0-0-0"] qparams["value0-0-1"] = query qparams["order"] = config.choices["order"].get(order, "Bug Number") qparams["bug_severity"] = severity or [] qparams["priority"] = priority or [] if status is None: # NEW, ASSIGNED and REOPENED is obsolete as of bugzilla 3.x and has # been removed from bugs.gentoo.org on 2011/05/01 qparams["bug_status"] = ["NEW", "ASSIGNED", "REOPENED", "UNCONFIRMED", "CONFIRMED", "IN_PROGRESS"] elif [s.upper() for s in status] == ["ALL"]: qparams["bug_status"] = config.choices["status"] else: qparams["bug_status"] = [s.upper() for s in status] qparams["product"] = product or "" qparams["component"] = component or "" qparams["status_whiteboard"] = whiteboard or "" qparams["keywords"] = keywords or "" # hoops to jump through for emails, since there are # only two fields, we have to figure out what combinations # to use if all three are set. unique = list(set([assigned_to, cc, reporter, commenter])) unique = [u for u in unique if u] if len(unique) < 3: for i in range(len(unique)): e = unique[i] n = i + 1 qparams["email%d" % n] = e qparams["emailassigned_to%d" % n] = int(e == assigned_to) qparams["emailreporter%d" % n] = int(e == reporter) qparams["emailcc%d" % n] = int(e == cc) qparams["emaillongdesc%d" % n] = int(e == commenter) else: raise AssertionError("Cannot set assigned_to, cc, and " "reporter in the same query") req_params = urlencode(qparams, True) req_url = urljoin(self.base, config.urls["list"]) req_url += "?" + req_params req = Request(req_url, None, config.headers) if self.httpuser and self.httppassword: base64string = base64.encodestring("%s:%s" % (self.httpuser, self.httppassword))[:-1] req.add_header("Authorization", "Basic %s" % base64string) resp = self.opener.open(req) return self.extractResults(resp) def namedcmd(self, cmd): """Run command stored in Bugzilla by name. @return: Result from the stored command. @rtype: list of dicts """ if not self.authenticated and not self.skip_auth: self.auth() qparams = config.params["namedcmd"].copy() # Is there a better way of getting a command with a space in its name # to be encoded as foo%20bar instead of foo+bar or foo%2520bar? qparams["namedcmd"] = quote(cmd) req_params = urlencode(qparams, True) req_params = req_params.replace("%25", "%") req_url = urljoin(self.base, config.urls["list"]) req_url += "?" + req_params req = Request(req_url, None, config.headers) if self.user and self.password: base64string = base64.encodestring("%s:%s" % (self.user, self.password))[:-1] req.add_header("Authorization", "Basic %s" % base64string) resp = self.opener.open(req) return self.extractResults(resp) def get(self, bugid): """Get an ElementTree representation of a bug. @param bugid: bug id @type bugid: int @rtype: ElementTree """ if not self.authenticated and not self.skip_auth: self.auth() qparams = config.params["show"].copy() qparams["id"] = bugid req_params = urlencode(qparams, True) req_url = urljoin(self.base, config.urls["show"]) req_url += "?" + req_params req = Request(req_url, None, config.headers) if self.httpuser and self.httppassword: base64string = base64.encodestring("%s:%s" % (self.httpuser, self.httppassword))[:-1] req.add_header("Authorization", "Basic %s" % base64string) resp = self.opener.open(req) data = resp.read() # Get rid of control characters. data = re.sub("[\x00-\x08\x0e-\x1f\x0b\x0c]", "", data) fd = StringIO(data) # workaround for ill-defined XML templates in bugzilla 2.20.2 (major_version, minor_version) = (sys.version_info[0], sys.version_info[1]) if major_version > 2 or (major_version == 2 and minor_version >= 7): # If this is 2.7 or greater, then XMLTreeBuilder # does what we want. parser = ElementTree.XMLParser() else: # Running under Python 2.6, so we need to use our # subclass of XMLTreeBuilder instead. parser = ForcedEncodingXMLTreeBuilder(encoding="utf-8") etree = ElementTree.parse(fd, parser) bug = etree.find(".//bug") if bug is not None and bug.attrib.has_key("error"): return None else: return etree def modify( self, bugid, title=None, comment=None, url=None, status=None, resolution=None, assigned_to=None, duplicate=0, priority=None, severity=None, add_cc=[], remove_cc=[], add_dependson=[], remove_dependson=[], add_blocked=[], remove_blocked=[], whiteboard=None, keywords=None, component=None, ): """Modify an existing bug @param bugid: bug id @type bugid: int @keyword title: new title for bug @type title: string @keyword comment: comment to add @type comment: string @keyword url: new url @type url: string @keyword status: new status (note, if you are changing it to RESOLVED, you need to set {resolution} as well. @type status: string @keyword resolution: new resolution (if status=RESOLVED) @type resolution: string @keyword assigned_to: email (needs to exist in bugzilla) @type assigned_to: string @keyword duplicate: bug id to duplicate against (if resolution = DUPLICATE) @type duplicate: int @keyword priority: new priority for bug @type priority: string @keyword severity: new severity for bug @type severity: string @keyword add_cc: list of emails to add to the cc list @type add_cc: list of strings @keyword remove_cc: list of emails to remove from cc list @type remove_cc: list of string. @keyword add_dependson: list of bug ids to add to the depend list @type add_dependson: list of strings @keyword remove_dependson: list of bug ids to remove from depend list @type remove_dependson: list of strings @keyword add_blocked: list of bug ids to add to the blocked list @type add_blocked: list of strings @keyword remove_blocked: list of bug ids to remove from blocked list @type remove_blocked: list of strings @keyword whiteboard: set status whiteboard @type whiteboard: string @keyword keywords: set keywords @type keywords: string @keyword component: set component @type component: string @return: list of fields modified. @rtype: list of strings """ if not self.authenticated and not self.skip_auth: self.auth() buginfo = Bugz.get(self, bugid) if not buginfo: return False modified = [] qparams = config.params["modify"].copy() qparams["id"] = bugid # NOTE: knob has been removed in bugzilla 4 and 3? qparams["knob"] = "none" # copy existing fields FIELDS = ( "bug_file_loc", "bug_severity", "short_desc", "bug_status", "status_whiteboard", "keywords", "resolution", "op_sys", "priority", "version", "target_milestone", "assigned_to", "rep_platform", "product", "component", "token", ) FIELDS_MULTI = ("blocked", "dependson") for field in FIELDS: try: qparams[field] = buginfo.find(".//%s" % field).text if qparams[field] is None: del qparams[field] except: pass for field in FIELDS_MULTI: qparams[field] = [d.text for d in buginfo.findall(".//%s" % field) if d is not None and d.text is not None] # set 'knob' if we are change the status/resolution # or trying to reassign bug. if status: status = status.upper() if resolution: resolution = resolution.upper() if status and status != qparams["bug_status"]: # Bugzilla >= 3.x qparams["bug_status"] = status if status == "RESOLVED": qparams["knob"] = "resolve" if resolution: qparams["resolution"] = resolution else: qparams["resolution"] = "FIXED" modified.append(("status", status)) modified.append(("resolution", qparams["resolution"])) elif status == "ASSIGNED" or status == "IN_PROGRESS": qparams["knob"] = "accept" modified.append(("status", status)) elif status == "REOPENED": qparams["knob"] = "reopen" modified.append(("status", status)) elif status == "VERIFIED": qparams["knob"] = "verified" modified.append(("status", status)) elif status == "CLOSED": qparams["knob"] = "closed" modified.append(("status", status)) elif duplicate: # Bugzilla >= 3.x qparams["bug_status"] = "RESOLVED" qparams["resolution"] = "DUPLICATE" qparams["knob"] = "duplicate" qparams["dup_id"] = duplicate modified.append(("status", "RESOLVED")) modified.append(("resolution", "DUPLICATE")) elif assigned_to: qparams["knob"] = "reassign" qparams["assigned_to"] = assigned_to modified.append(("assigned_to", assigned_to)) # setup modification of other bits if comment: qparams["comment"] = comment modified.append(("comment", ellipsis(comment, 60))) if title: qparams["short_desc"] = title or "" modified.append(("title", title)) if url is not None: qparams["bug_file_loc"] = url modified.append(("url", url)) if severity is not None: qparams["bug_severity"] = severity modified.append(("severity", severity)) if priority is not None: qparams["priority"] = priority modified.append(("priority", priority)) # cc manipulation if add_cc is not None: qparams["newcc"] = ", ".join(add_cc) modified.append(("newcc", qparams["newcc"])) if remove_cc is not None: qparams["cc"] = remove_cc qparams["removecc"] = "on" modified.append(("cc", remove_cc)) # bug depend/blocked manipulation changed_dependson = False changed_blocked = False if remove_dependson: for bug_id in remove_dependson: qparams["dependson"].remove(str(bug_id)) changed_dependson = True if remove_blocked: for bug_id in remove_blocked: qparams["blocked"].remove(str(bug_id)) changed_blocked = True if add_dependson: for bug_id in add_dependson: qparams["dependson"].append(str(bug_id)) changed_dependson = True if add_blocked: for bug_id in add_blocked: qparams["blocked"].append(str(bug_id)) changed_blocked = True qparams["dependson"] = ",".join(qparams["dependson"]) qparams["blocked"] = ",".join(qparams["blocked"]) if changed_dependson: modified.append(("dependson", qparams["dependson"])) if changed_blocked: modified.append(("blocked", qparams["blocked"])) if whiteboard is not None: qparams["status_whiteboard"] = whiteboard modified.append(("status_whiteboard", whiteboard)) if keywords is not None: qparams["keywords"] = keywords modified.append(("keywords", keywords)) if component is not None: qparams["component"] = component modified.append(("component", component)) req_params = urlencode(qparams, True) req_url = urljoin(self.base, config.urls["modify"]) req = Request(req_url, req_params, config.headers) if self.httpuser and self.httppassword: base64string = base64.encodestring("%s:%s" % (self.httpuser, self.httppassword))[:-1] req.add_header("Authorization", "Basic %s" % base64string) try: resp = self.opener.open(req) re_error = re.compile(r'id="error_msg".*>([^<]+)<') error = re_error.search(resp.read()) if error: print error.group(1) return [] return modified except: return [] def attachment(self, attachid): """Get an attachment by attachment_id @param attachid: attachment id @type attachid: int @return: dict with three keys, 'filename', 'size', 'fd' @rtype: dict """ if not self.authenticated and not self.skip_auth: self.auth() qparams = config.params["attach"].copy() qparams["id"] = attachid req_params = urlencode(qparams, True) req_url = urljoin(self.base, config.urls["attach"]) req_url += "?" + req_params req = Request(req_url, None, config.headers) if self.httpuser and self.httppassword: base64string = base64.encodestring("%s:%s" % (self.httpuser, self.httppassword))[:-1] req.add_header("Authorization", "Basic %s" % base64string) resp = self.opener.open(req) try: content_type = resp.info()["Content-type"] namefield = content_type.split(";")[1] filename = re.search(r"name=\"(.*)\"", namefield).group(1) content_length = int(resp.info()["Content-length"], 0) return {"filename": filename, "size": content_length, "fd": resp} except: return {} def post( self, product, component, title, description, url="", assigned_to="", cc="", keywords="", version="", dependson="", blocked="", priority="", severity="", ): """Post a bug @param product: product where the bug should be placed @type product: string @param component: component where the bug should be placed @type component: string @param title: title of the bug. @type title: string @param description: description of the bug @type description: string @keyword url: optional url to submit with bug @type url: string @keyword assigned_to: optional email to assign bug to @type assigned_to: string. @keyword cc: option list of CC'd emails @type: string @keyword keywords: option list of bugzilla keywords @type: string @keyword version: version of the component @type: string @keyword dependson: bugs this one depends on @type: string @keyword blocked: bugs this one blocks @type: string @keyword priority: priority of this bug @type: string @keyword severity: severity of this bug @type: string @rtype: int @return: the bug number, or 0 if submission failed. """ if not self.authenticated and not self.skip_auth: self.auth() qparams = config.params["post"].copy() qparams["product"] = product qparams["component"] = component qparams["short_desc"] = title qparams["comment"] = description qparams["assigned_to"] = assigned_to qparams["cc"] = cc qparams["bug_file_loc"] = url qparams["dependson"] = dependson qparams["blocked"] = blocked qparams["keywords"] = keywords # XXX: default version is 'unspecified' if version != "": qparams["version"] = version # XXX: default priority is 'Normal' if priority != "": qparams["priority"] = priority # XXX: default severity is 'normal' if severity != "": qparams["bug_severity"] = severity req_params = urlencode(qparams, True) req_url = urljoin(self.base, config.urls["post"]) req = Request(req_url, req_params, config.headers) if self.httpuser and self.httppassword: base64string = base64.encodestring("%s:%s" % (self.httpuser, self.httppassword))[:-1] req.add_header("Authorization", "Basic %s" % base64string) resp = self.opener.open(req) try: re_bug = re.compile(r"(?:\s+)?<title>.*Bug ([0-9]+) Submitted.*</title>") bug_match = re_bug.search(resp.read()) if bug_match: return int(bug_match.group(1)) except: pass return 0 def attach(self, bugid, title, description, filename, content_type="text/plain", ispatch=False): """Attach a file to a bug. @param bugid: bug id @type bugid: int @param title: short description of attachment @type title: string @param description: long description of the attachment @type description: string @param filename: filename of the attachment @type filename: string @keywords content_type: mime-type of the attachment @type content_type: string @rtype: bool @return: True if successful, False if not successful. """ if not self.authenticated and not self.skip_auth: self.auth() qparams = config.params["attach_post"].copy() qparams["bugid"] = bugid qparams["description"] = title qparams["comment"] = description if ispatch: qparams["ispatch"] = "1" qparams["contenttypeentry"] = "text/plain" else: qparams["contenttypeentry"] = content_type filedata = [("data", filename, open(filename).read())] content_type, body = encode_multipart_formdata(qparams.items(), filedata) req_headers = config.headers.copy() req_headers["Content-type"] = content_type req_headers["Content-length"] = len(body) req_url = urljoin(self.base, config.urls["attach_post"]) req = Request(req_url, body, req_headers) if self.httpuser and self.httppassword: base64string = base64.encodestring("%s:%s" % (self.httpuser, self.httppassword))[:-1] req.add_header("Authorization", "Basic %s" % base64string) resp = self.opener.open(req) # TODO: return attachment id and success? try: re_attach = re.compile(r"<title>(.+)</title>") # Bugzilla 3/4 re_attach34 = re.compile(r"Attachment \d+ added to Bug \d+") response = resp.read() attach_match = re_attach.search(response) if attach_match: if attach_match.group(1) == "Changes Submitted" or re_attach34.match(attach_match.group(1)): return True else: return attach_match.group(1) else: return False except: pass return False
class ControlPlaneClient(object): """ """ def __init__(self, user, password, host=None, port=None): """ """ self._cj = CookieJar() self._opener = urllib2.build_opener( urllib2.HTTPHandler(), urllib2.HTTPSHandler(), urllib2.HTTPCookieProcessor(self._cj) ) # Zproxy always provides a proxy to serviced on port 443 self._server = { "host": "127.0.0.1", "port": 443, } self._creds = {"username": user, "password": password} self._netloc = "%(host)s:%(port)s" % self._server self.cc_version = getCCVersion() self._hothOrNewer = False if self.cc_version == "1.1.X" else True self._useHttps = self._checkUseHttps() self._v2loc = "/api/v2" self._servicesEndpoint = "%s/services" % self._v2loc def _checkUseHttps(self): """ Starting in CC 1.2.0, port 443 in the containers does not support https. """ use_https = True cc_master = self._server.get("host") if self._hothOrNewer and cc_master in [ "localhost", "127.0.0.1" ]: use_https = False return use_https def queryServices(self, name=None, tags=None, tenantID=None): """ Returns a sequence of ServiceDefinition objects that match the given requirements. """ query = {} if name: namepat = fnmatch.translate(name) # controlplane regex accepts \z, not \Z. namepat = namepat.replace("\\Z", "\\z") query["name"] = namepat if tags: if isinstance(tags, (str, unicode)): tags = [tags] query["tags"] = ','.join(tags) if tenantID: query["tenantID"] = tenantID response = self._dorequest(self._servicesEndpoint, query=query) body = ''.join(response.readlines()) response.close() decoded = ServiceJsonDecoder().decode(body) if decoded is None: decoded = [] return decoded def getService(self, serviceId, default=None): """ Returns the ServiceDefinition object for the given service. """ response = self._dorequest("/services/%s" % serviceId) body = ''.join(response.readlines()) response.close() return ServiceJsonDecoder().decode(body) def getChangesSince(self, age): """ Returns a sequence of ServiceDefinition objects that have changed within the given age. If there are no changes, and empty sequence is returned. :param age: How far back to look, in milliseconds, for changes. """ query = {"since": age} response = self._dorequest(self._servicesEndpoint, query=query) body = ''.join(response.readlines()) response.close() decoded = ServiceJsonDecoder().decode(body) if decoded is None: decoded = [] return decoded def updateServiceProperty(self, service, prop): """ Updates the launch property of a service. :param ServiceDefinition service: The modified definition """ oldService = self.getService(service.id) oldService._data[prop] = service._data[prop] body = ServiceJsonEncoder().encode(oldService) LOG.info("Updating prop '%s' for service '%s':%s resourceId=%s", prop, service.name, service.id, service.resourceId) LOG.debug("Updating service %s", body) response = self._dorequest( service.resourceId, method="PUT", data=body ) body = ''.join(response.readlines()) response.close() def updateService(self, service): """ Updates the definition/state of a service. :param ServiceDefinition service: The modified definition """ body = ServiceJsonEncoder().encode(service) LOG.info("Updating service '%s':%s", service.name, service.id) LOG.debug("Updating service %s", body) response = self._dorequest( service.resourceId, method="PUT", data=body ) body = ''.join(response.readlines()) response.close() def startService(self, serviceId): """ Start the given service :param string ServiceId: The service to start """ LOG.info("Starting service '%s", serviceId) response = self._dorequest("/services/%s/startService" % serviceId, method='PUT') body = ''.join(response.readlines()) response.close() return ServiceJsonDecoder().decode(body) def stopService(self, serviceId): """ Stop the given service :param string ServiceId: The service to stop """ LOG.info("Stopping service %s", serviceId) response = self._dorequest("/services/%s/stopService" % serviceId, method='PUT') body = ''.join(response.readlines()) response.close() return ServiceJsonDecoder().decode(body) def addService(self, serviceDefinition): """ Add a new service :param string serviceDefinition: json encoded representation of service :returns string: json encoded representation of new service's links """ LOG.info("Adding service") LOG.debug(serviceDefinition) response = self._dorequest( "/services/add", method="POST", data=serviceDefinition ) body = ''.join(response.readlines()) response.close() return body def deleteService(self, serviceId): """ Delete a service :param string serviceId: Id of the service to delete """ LOG.info("Removing service %s", serviceId) response = self._dorequest( "/services/%s" % serviceId, method="DELETE" ) response.close() def deployService(self, parentId, service): """ Deploy a new service :param string parentId: parent service id :param string service: json encoded representation of service :returns string: json encoded representation of new service's links """ LOG.info("Deploying service") data = { 'ParentID': parentId, 'Service': json.loads(service) } LOG.debug(data) response = self._dorequest( "/services/deploy", method="POST", data=json.dumps(data) ) body = ''.join(response.readlines()) response.close() return body def queryServiceInstances(self, serviceId): """ Returns a sequence of ServiceInstance objects. """ response = self._dorequest("/services/%s/running" % serviceId) body = ''.join(response.readlines()) response.close() return ServiceJsonDecoder().decode(body) def queryServiceStatus(self, serviceId): """ CC version-independent call to get the status of a service. Calls queryServiceStatusImpl or queryServiceInstancesV2 to get the status for serviceId. :param serviceId: The serviceId to get the status of :type serviceId: string :returns: The result of the query decoded :rtype: dict of ServiceStatus objects with ID as key """ if self._hothOrNewer: raw = self.queryServiceInstancesV2(serviceId) decoded = self._convertInstancesV2ToStatuses(raw) else: decoded = self.queryServiceStatusImpl(serviceId) return decoded def queryServiceStatusImpl(self, serviceId): """ Implementation for queryServiceStatus that uses the /services/:serviceid/status endpoint. :param serviceId: The serviceId to get the status of :type serviceId: string :returns: The result of the query decoded :rtype: dict of ServiceStatus objects with ID as key """ response = self._dorequest("/services/%s/status" % serviceId) body = ''.join(response.readlines()) response.close() decoded = ServiceStatusJsonDecoder().decode(body) return decoded def queryServiceInstancesV2(self, serviceId): """ Uses the CC V2 api to query the instances of serviceId. :param serviceId: The serviceId to get the instances of :type serviceId: string :returns: The raw result of the query :rtype: json formatted string """ response = self._dorequest("%s/services/%s/instances" % (self._v2loc, serviceId)) body = ''.join(response.readlines()) response.close() return body def _convertInstancesV2ToStatuses(self, rawV2Instance): """ Converts a list of raw Instance (V2) json to a dict of ServiceStatuses. This is for compatibility sake. :param rawV2Instance: The result from a call to queryServiceInstancesV2 :type rawV2Instance: json formatted string :returns: An acceptable output from queryServiceStatus :rtype: dict of ServiceStatus objects with ID as key """ decoded = InstanceV2ToServiceStatusJsonDecoder().decode(rawV2Instance) # V2 gives us a list, we need a dict with ID as key decoded = {instance.id: instance for instance in decoded} return decoded def queryHosts(self): """ Returns a sequence of Host objects. """ response = self._dorequest("/hosts") body = ''.join(response.readlines()) response.close() return HostJsonDecoder().decode(body) def getHost(self, hostId): """ Returns a sequence of Host objects. """ response = self._dorequest("/hosts/%" % hostId) body = ''.join(response.readlines()) response.close() return HostJsonDecoder().decode(body) def getInstance(self, serviceId, instanceId, default=None): """ Returns the requested ServiceInstance object. """ response = self._dorequest( "/services/%s/running/%s" % (serviceId, instanceId) ) body = ''.join(response.readlines()) response.close() return ServiceJsonDecoder().decode(body) def getServiceLog(self, serviceId, start=0, end=None): """ """ response = self._dorequest("/services/%s/logs" % serviceId) body = ''.join(response.readlines()) response.close() log = json.loads(body) return log["Detail"] def getInstanceLog(self, serviceId, instanceId, start=0, end=None): """ """ response = self._dorequest( "/services/%s/%s/logs" % (serviceId, instanceId) ) body = ''.join(response.readlines()) response.close() log = json.loads(body) return str(log["Detail"]) def killInstance(self, hostId, uuid): """ """ response = self._dorequest( "/hosts/%s/%s" % (hostId, uuid), method="DELETE" ) response.close() def getServicesForMigration(self, serviceId): """ """ query = {"includeChildren": "true"} response = self._dorequest("/services/%s" % serviceId, query=query) body = ''.join(response.readlines()) response.close() return json.loads(body) def postServicesForMigration(self, data, serviceId): """ """ response = self._dorequest( "/services/%s/migrate" % serviceId, method="POST", data=data ) body = ''.join(response.readlines()) response.close() return body def getPoolsData(self): """ Get all the pools and return raw json """ response = self._dorequest("/pools") body = ''.join(response.readlines()) response.close() return body def getHostsData(self): """ Get all the pools and return raw json """ response = self._dorequest("/hosts") body = ''.join(response.readlines()) response.close() return body def getRunningServicesData(self): """ Get all the running services and return raw json """ body = '' if not self._hothOrNewer: response = self._dorequest("/running") body = ''.join(response.readlines()) response.close() else: hostsData = self.queryHosts() for hostID in hostsData: response = self._dorequest("/hosts/%s/running" %hostID) body = body + ''.join(response.readlines()) response.close() return body def getStorageData(self): """ Get the storage information and return raw json """ response = self._dorequest("/storage") body = ''.join(response.readlines()) response.close() return body def _makeRequest(self, uri, method=None, data=None, query=None): query = urllib.urlencode(query) if query else "" url = urlunparse(("https" if self._useHttps else "http", self._netloc, uri, "", query, "")) args = {} if method: args["method"] = method if data: args["data"] = data args["headers"] = {"Content-Type": "application/json"} return _Request(url, **args) def _login(self): # Clear the cookie jar before logging in. self._cj.clear() encodedbody = json.dumps(self._creds) request = self._makeRequest("/login", data=encodedbody) response = self._opener.open(request) response.close() self._opener.close() def _dorequest(self, uri, method=None, data=None, query=None): # Try to perform the request up to five times for trycount in range(5): request = self._makeRequest(uri, method=method, data=data, query=query) try: return self._opener.open(request) except urllib2.HTTPError as ex: if ex.getcode() == 401: self._login() continue elif ex.getcode() == 500: # Make the exception prettier and reraise it try: msg = json.load(ex) except ValueError: raise ex # This stinks because we lose the stack detail = msg.get('Detail') if not detail: raise detail = detail.replace("Internal Server Error: ", "") raise ControlCenterError(detail) raise # The CC server resets the connection when an unauthenticated POST requesti is # made. Depending on when during the request lifecycle the connection is reset, # we can get either an URLError with a socket.error as the reason, or a naked # socket.error. In either case, the socket.error.errno indicates that the # connection was reset with an errno of ECONNRESET (104). # When we get a connection reset exception, assume that the reset was caused # by lack of authentication, login, and retry the request. except urllib2.URLError as ex: reason = ex.reason if type(reason) == socket_error and reason.errno == ECONNRESET: self._login() continue raise except socket_error as ex: if ex.errno == ECONNRESET: self._login() continue raise else: # break the loop so we skip the loop's else clause break else: # raises the last exception that was raised (the 401 error) raise def _get_cookie_jar(self): return self._cj def cookies(self): """ Get the cookie(s) being used. If the cookie/cookiejar implementation changes, this method should be revisited. Return a list of dicts of cookies of the form: { 'name': 'cookieName', 'value': 'cookieValue', 'domain': 'cookieDomain', 'path': 'cookiePath', 'expires': seconds from epoch to expore cookie, # leave blank to be a session cookie 'secure': False/True, } """ self._login() cookies = [] for cookie in self._get_cookie_jar(): cookies.append( { 'name': cookie.name, 'value': cookie.value, 'domain': cookie.domain, 'path': cookie.path, 'expires': cookie.expires, 'secure': cookie.discard } ) return cookies
class Site(object): """ **EarwigBot: Wiki Toolset: Site** Represents a site, with support for API queries and returning :py:class:`~earwigbot.wiki.page.Page`, :py:class:`~earwigbot.wiki.user.User`, and :py:class:`~earwigbot.wiki.category.Category` objects. The constructor takes a bunch of arguments and you probably won't need to call it directly, rather :py:meth:`wiki.get_site() <earwigbot.wiki.sitesdb.SitesDB.get_site>` for returning :py:class:`Site` instances, :py:meth:`wiki.add_site() <earwigbot.wiki.sitesdb.SitesDB.add_site>` for adding new ones to our database, and :py:meth:`wiki.remove_site() <earwigbot.wiki.sitesdb.SitesDB.remove_site>` for removing old ones from our database, should suffice. *Attributes:* - :py:attr:`name`: the site's name (or "wikiid"), like ``"enwiki"`` - :py:attr:`project`: the site's project name, like ``"wikipedia"`` - :py:attr:`lang`: the site's language code, like ``"en"`` - :py:attr:`domain`: the site's web domain, like ``"en.wikipedia.org"`` - :py:attr:`url`: the site's URL, like ``"https://en.wikipedia.org"`` *Public methods:* - :py:meth:`api_query`: does an API query with kwargs as params - :py:meth:`sql_query`: does an SQL query and yields its results - :py:meth:`get_maxlag`: returns the internal database lag - :py:meth:`get_replag`: estimates the external database lag - :py:meth:`get_token`: gets a token for a specific API action - :py:meth:`namespace_id_to_name`: returns names associated with an NS id - :py:meth:`namespace_name_to_id`: returns the ID associated with a NS name - :py:meth:`get_page`: returns a Page for the given title - :py:meth:`get_category`: returns a Category for the given title - :py:meth:`get_user`: returns a User object for the given name - :py:meth:`delegate`: controls when the API or SQL is used """ SERVICE_API = 1 SERVICE_SQL = 2 SPECIAL_TOKENS = [ "createaccount", "deleteglobalaccount", "login", "patrol", "rollback", "setglobalaccountstatus", "userrights", "watch" ] def __init__(self, name=None, project=None, lang=None, base_url=None, article_path=None, script_path=None, sql=None, namespaces=None, login=(None, None), oauth=None, cookiejar=None, user_agent=None, use_https=True, assert_edit=None, maxlag=None, wait_between_queries=2, logger=None, search_config=None): """Constructor for new Site instances. This probably isn't necessary to call yourself unless you're building a Site that's not in your config and you don't want to add it - normally all you need is wiki.get_site(name), which creates the Site for you based on your config file and the sites database. We accept a bunch of kwargs, but the only ones you really "need" are *base_url* and *script_path*; this is enough to figure out an API url. *login*, a tuple of (username, password), can be used to log in using the legacy BotPasswords system; otherwise, a dict of OAuth info should be provided to *oauth*. *cookiejar* will be used to store cookies, and we'll use a normal CookieJar if none is given. First, we'll store the given arguments as attributes, then set up our requests session. We'll load any of the attributes that weren't given from the API, and then log in if a username/pass was given and we aren't already logged in. """ # Attributes referring to site information, filled in by an API query # if they are missing (and an API url can be determined): self._name = name self._project = project self._lang = lang self._base_url = base_url self._article_path = article_path self._script_path = script_path self._namespaces = namespaces # Attributes used for API queries: self._use_https = use_https self._assert_edit = assert_edit self._maxlag = maxlag self._wait_between_queries = wait_between_queries self._max_retries = 6 self._last_query_time = 0 self._tokens = {} self._api_lock = RLock() self._api_info_cache = {"maxlag": 0, "lastcheck": 0} # Attributes used for SQL queries: if sql: self._sql_data = sql else: self._sql_data = {} self._sql_conn = None self._sql_lock = RLock() self._sql_info_cache = {"replag": 0, "lastcheck": 0, "usable": None} # Attribute used in copyright violation checks (see CopyrightMixIn): if search_config: self._search_config = search_config else: self._search_config = {} # Set up cookiejar and requests session for making API queries: if cookiejar is not None: self._cookiejar = cookiejar else: self._cookiejar = CookieJar() self._last_cookiejar_save = None if not user_agent: user_agent = constants.USER_AGENT # Set default UA self._oauth = oauth self._session = requests.Session() self._session.cookies = self._cookiejar self._session.headers["User-Agent"] = user_agent if oauth: self._session.auth = OAuth1(oauth["consumer_token"], oauth["consumer_secret"], oauth["access_token"], oauth["access_secret"]) # Set up our internal logger: if logger: self._logger = logger else: # Just set up a null logger to eat up our messages: self._logger = getLogger("earwigbot.wiki") self._logger.addHandler(NullHandler()) # Get all of the above attributes that were not specified as arguments: self._load_attributes() # If we have a name/pass and the API says we're not logged in, log in: self._login_info = name, password = login if not self._oauth and name and password: logged_in_as = self._get_username_from_cookies() if not logged_in_as or name.replace("_", " ") != logged_in_as: self._login(login) def __repr__(self): """Return the canonical string representation of the Site.""" res = ", ".join( ("Site(name={_name!r}", "project={_project!r}", "lang={_lang!r}", "base_url={_base_url!r}", "article_path={_article_path!r}", "script_path={_script_path!r}", "use_https={_use_https!r}", "assert_edit={_assert_edit!r}", "maxlag={_maxlag!r}", "sql={_sql_data!r}", "login={0}", "oauth={1}", "user_agent={3!r}", "cookiejar={2})")) name, password = self._login_info login = "******".format(repr(name), "hidden" if password else None) oauth = "hidden" if self._oauth else None cookies = self._cookiejar.__class__.__name__ if hasattr(self._cookiejar, "filename"): cookies += "({0!r})".format(getattr(self._cookiejar, "filename")) else: cookies += "()" agent = self.user_agent return res.format(login, oauth, cookies, agent, **self.__dict__) def __str__(self): """Return a nice string representation of the Site.""" res = "<Site {0} ({1}:{2}) at {3}>" return res.format(self.name, self.project, self.lang, self.domain) def _unicodeify(self, value, encoding="utf8"): """Return input as unicode if it's not unicode to begin with.""" if isinstance(value, unicode): return value return unicode(value, encoding) def _urlencode_utf8(self, params): """Implement urllib.urlencode() with support for unicode input.""" enc = lambda s: s.encode("utf8") if isinstance(s, unicode) else str(s) args = [] for key, val in params.iteritems(): key = quote_plus(enc(key)) val = quote_plus(enc(val)) args.append(key + "=" + val) return "&".join(args) def _api_query(self, params, tries=0, wait=5, ignore_maxlag=False, no_assert=False, ae_retry=True): """Do an API query with *params* as a dict of parameters. See the documentation for :py:meth:`api_query` for full implementation details. *tries*, *wait*, and *ignore_maxlag* are for maxlag; *no_assert* and *ae_retry* are for AssertEdit. """ since_last_query = time() - self._last_query_time # Throttling support if since_last_query < self._wait_between_queries: wait_time = self._wait_between_queries - since_last_query log = "Throttled: waiting {0} seconds".format(round(wait_time, 2)) self._logger.debug(log) sleep(wait_time) self._last_query_time = time() url, data = self._build_api_query(params, ignore_maxlag, no_assert) if "lgpassword" in params: self._logger.debug("{0} -> <hidden>".format(url)) elif len(data) > 1000: self._logger.debug("{0} -> {1}...".format(url, data[:997])) else: self._logger.debug("{0} -> {1}".format(url, data)) try: response = self._session.post(url, data=data) response.raise_for_status() except requests.RequestException as exc: raise exceptions.APIError("API query failed: {0}".format(exc)) return self._handle_api_result(response, params, tries, wait, ae_retry) def _request_csrf_token(self, params): """If possible, add a request for a CSRF token to an API query.""" if params.get("action") == "query": if params.get("meta"): if "tokens" not in params["meta"].split("|"): params["meta"] += "|tokens" else: params["meta"] = "tokens" if params.get("type"): if "csrf" not in params["type"].split("|"): params["type"] += "|csrf" def _build_api_query(self, params, ignore_maxlag, no_assert): """Given API query params, return the URL to query and POST data.""" if not self._base_url or self._script_path is None: e = "Tried to do an API query, but no API URL is known." raise exceptions.APIError(e) url = self.url + self._script_path + "/api.php" params["format"] = "json" # This is the only format we understand if self._assert_edit and not no_assert: # If requested, ensure that we're logged in params["assert"] = self._assert_edit if self._maxlag and not ignore_maxlag: # If requested, don't overload the servers: params["maxlag"] = self._maxlag if "csrf" not in self._tokens: # If we don't have a CSRF token, try to fetch one: self._request_csrf_token(params) data = self._urlencode_utf8(params) return url, data def _handle_api_result(self, response, params, tries, wait, ae_retry): """Given an API query response, attempt to return useful data.""" try: res = response.json() except ValueError: e = "API query failed: JSON could not be decoded." raise exceptions.APIError(e) if "warnings" in res: for name, value in res["warnings"].items(): try: warning = value["warnings"] except KeyError: try: warning = value["*"] except KeyError: warning = value self._logger.warning("API warning: %s: %s", name, warning) if self._should_save_cookiejar(): self._save_cookiejar() try: code = res["error"]["code"] info = res["error"]["info"] except (TypeError, KeyError): # If there's no error code/info, return if "query" in res and "tokens" in res["query"]: for name, token in res["query"]["tokens"].iteritems(): self._tokens[name.split("token")[0]] = token return res if code == "maxlag": # We've been throttled by the server if tries >= self._max_retries: e = "Maximum number of retries reached ({0})." raise exceptions.APIError(e.format(self._max_retries)) tries += 1 msg = 'Server says "{0}"; retrying in {1} seconds ({2}/{3})' self._logger.info(msg.format(info, wait, tries, self._max_retries)) sleep(wait) return self._api_query(params, tries, wait * 2, ae_retry=ae_retry) elif code in ["assertuserfailed", "assertbotfailed"]: # AssertEdit if ae_retry and all(self._login_info) and not self._oauth: # Try to log in if we got logged out: self._login(self._login_info) if "token" in params: # Fetch a new one; this is invalid now params["token"] = self.get_token(params["action"]) return self._api_query(params, tries, wait, ae_retry=False) if not all(self._login_info) and not self._oauth: e = "Assertion failed, and no login info was provided." elif code == "assertbotfailed": e = "Bot assertion failed: we don't have a bot flag!" else: e = "User assertion failed due to an unknown issue. Cookie or OAuth problem?" raise exceptions.PermissionsError("AssertEdit: " + e) else: # Some unknown error occurred e = 'API query failed: got error "{0}"; server says: "{1}".' error = exceptions.APIError(e.format(code, info)) error.code, error.info = code, info raise error def _load_attributes(self, force=False): """Load data about our Site from the API. This function is called by __init__() when one of the site attributes was not given as a keyword argument. We'll do an API query to get the missing data, but only if there actually *is* missing data. Additionally, you can call this with *force* set to True to forcibly reload all attributes. """ # All attributes to be loaded, except _namespaces, which is a special # case because it requires additional params in the API query: attrs = [ self._name, self._project, self._lang, self._base_url, self._article_path, self._script_path ] params = {"action": "query", "meta": "siteinfo", "siprop": "general"} if not self._namespaces or force: params["siprop"] += "|namespaces|namespacealiases" with self._api_lock: result = self._api_query(params, no_assert=True) self._load_namespaces(result) elif all(attrs): # Everything is already specified and we're not told return # to force a reload, so do nothing else: # We're only loading attributes other than _namespaces with self._api_lock: result = self._api_query(params, no_assert=True) res = result["query"]["general"] self._name = res["wikiid"] self._project = res["sitename"].lower() self._lang = res["lang"] self._base_url = res["server"] self._article_path = res["articlepath"] self._script_path = res["scriptpath"] def _load_namespaces(self, result): """Fill self._namespaces with a dict of namespace IDs and names. Called by _load_attributes() with API data as *result* when self._namespaces was not given as an kwarg to __init__(). """ self._namespaces = {} for namespace in result["query"]["namespaces"].values(): ns_id = namespace["id"] name = namespace["*"] try: canonical = namespace["canonical"] except KeyError: self._namespaces[ns_id] = [name] else: if name != canonical: self._namespaces[ns_id] = [name, canonical] else: self._namespaces[ns_id] = [name] for namespace in result["query"]["namespacealiases"]: ns_id = namespace["id"] alias = namespace["*"] self._namespaces[ns_id].append(alias) def _get_cookie(self, name, domain): """Return the named cookie unless it is expired or doesn't exist.""" for cookie in self._cookiejar: if cookie.name == name and cookie.domain == domain: if cookie.is_expired(): break return cookie def _get_username_from_cookies(self): """Try to return our username based solely on cookies. First, we'll look for a cookie named self._name + "Token", like "enwikiToken". If it exists and isn't expired, we'll assume it's valid and try to return the value of the cookie self._name + "UserName" (like "enwikiUserName"). This should work fine on wikis without single-user login. If `enwikiToken` doesn't exist, we'll try to find a cookie named `centralauth_Token`. If this exists and is not expired, we'll try to return the value of `centralauth_User`. If we didn't get any matches, we'll return None. Our goal here isn't to return the most likely username, or what we *want* our username to be (for that, we'd do self._login_info[0]), but rather to get our current username without an unnecessary ?action=query&meta=userinfo API query. """ name = ''.join((self._name, "Token")) cookie = self._get_cookie(name, self.domain) if cookie: name = ''.join((self._name, "UserName")) user_name = self._get_cookie(name, self.domain) if user_name: return unquote_plus(user_name.value) for cookie in self._cookiejar: if cookie.name != "centralauth_Token" or cookie.is_expired(): continue base = cookie.domain if base.startswith(".") and not cookie.domain_initial_dot: base = base[1:] if self.domain.endswith(base): user_name = self._get_cookie("centralauth_User", cookie.domain) if user_name: return unquote_plus(user_name.value) def _get_username_from_api(self): """Do a simple API query to get our username and return it. This is a reliable way to make sure we are actually logged in, because it doesn't deal with annoying cookie logic, but it results in an API query that is unnecessary in some cases. Called by _get_username() (in turn called by get_user() with no username argument) when cookie lookup fails, probably indicating that we are logged out. """ result = self.api_query(action="query", meta="userinfo") return result["query"]["userinfo"]["name"] def _get_username(self): """Return the name of the current user, whether logged in or not. First, we'll try to deduce it solely from cookies, to avoid an unnecessary API query. For the cookie-detection method, see _get_username_from_cookies()'s docs. If our username isn't in cookies, then we're either using OAuth or we're probably not logged in, or something fishy is going on (like forced logout). If we're using OAuth and a username was configured, assume it is accurate and use it. Otherwise, do a single API query for our username (or IP address) and return that. """ name = self._get_username_from_cookies() if name: return name if self._oauth and self._login_info[0]: return self._login_info[0] return self._get_username_from_api() def _should_save_cookiejar(self): """Return a bool indicating whether we should save the cookiejar. This is True if we haven't saved the cookiejar yet this session, or if our last save was over a day ago. """ max_staleness = 60 * 60 * 24 # 1 day if not self._last_cookiejar_save: return True return time() - self._last_cookiejar_save > max_staleness def _save_cookiejar(self): """Try to save our cookiejar after doing a (normal) login or logout. Calls the standard .save() method with no filename. Don't fret if our cookiejar doesn't support saving (CookieJar raises AttributeError, FileCookieJar raises NotImplementedError) or no default filename was given (LWPCookieJar and MozillaCookieJar raise ValueError). """ if hasattr(self._cookiejar, "save"): try: getattr(self._cookiejar, "save")() except (NotImplementedError, ValueError): pass self._last_cookiejar_save = time() def _login(self, login): """Safely login through the API. Normally, this is called by __init__() if a username and password have been provided and no valid login cookies were found. The only other time it needs to be called is when those cookies expire, which is done automatically by api_query() if a query fails. *login* is a (username, password) tuple. Raises LoginError on login errors (duh), like bad passwords and nonexistent usernames. """ self._tokens.clear() name, password = login params = {"action": "query", "meta": "tokens", "type": "login"} with self._api_lock: result = self._api_query(params, no_assert=True) try: token = result["query"]["tokens"]["logintoken"] except KeyError: raise exceptions.LoginError("Couldn't get login token") params = { "action": "login", "lgname": name, "lgpassword": password, "lgtoken": token } with self._api_lock: result = self._api_query(params, no_assert=True) res = result["login"]["result"] if res == "Success": self._tokens.clear() self._save_cookiejar() return if res == "Illegal": e = "The provided username is illegal." elif res == "NotExists": e = "The provided username does not exist." elif res == "EmptyPass": e = "No password was given." elif res == "WrongPass" or res == "WrongPluginPass": e = "The given password is incorrect." else: e = "Couldn't login; server says '{0}'.".format(res) raise exceptions.LoginError(e) def _logout(self): """Safely logout through the API. We'll do a simple API request (api.php?action=logout), clear our cookiejar (which probably contains now-invalidated cookies) and try to save it, if it supports that sort of thing. """ self.api_query(action="logout") self._cookiejar.clear() self._save_cookiejar() def _sql_connect(self, **kwargs): """Attempt to establish a connection with this site's SQL database. oursql.connect() will be called with self._sql_data as its kwargs. Any kwargs given to this function will be passed to connect() and will have precedence over the config file. Will raise SQLError() if the module "oursql" is not available. oursql may raise its own exceptions (e.g. oursql.InterfaceError) if it cannot establish a connection. """ args = self._sql_data for key, value in kwargs.iteritems(): args[key] = value if "read_default_file" not in args and "user" not in args and "passwd" not in args: args["read_default_file"] = expanduser("~/.my.cnf") elif "read_default_file" in args: args["read_default_file"] = expanduser(args["read_default_file"]) if "autoping" not in args: args["autoping"] = True if "autoreconnect" not in args: args["autoreconnect"] = True try: self._sql_conn = oursql.connect(**args) except ImportError: e = "SQL querying requires the 'oursql' package: http://packages.python.org/oursql/" raise exceptions.SQLError(e) def _get_service_order(self): """Return a preferred order for using services (e.g. the API and SQL). A list is returned, starting with the most preferred service first and ending with the least preferred one. Currently, there are only two services. SERVICE_API will always be included since the API is expected to be always usable. In normal circumstances, self.SERVICE_SQL will be first (with the API second), since using SQL directly is easier on the servers than making web queries with the API. self.SERVICE_SQL will be second if replag is greater than three minutes (a cached value updated every two minutes at most), *unless* API lag is also very high. self.SERVICE_SQL will not be included in the list if we cannot form a proper SQL connection. """ now = time() if now - self._sql_info_cache["lastcheck"] > 120: self._sql_info_cache["lastcheck"] = now try: self._sql_info_cache["replag"] = sqllag = self.get_replag() except (exceptions.SQLError, oursql.Error): self._sql_info_cache["usable"] = False return [self.SERVICE_API] self._sql_info_cache["usable"] = True else: if not self._sql_info_cache["usable"]: return [self.SERVICE_API] sqllag = self._sql_info_cache["replag"] if sqllag > 300: if not self._maxlag: return [self.SERVICE_API, self.SERVICE_SQL] if now - self._api_info_cache["lastcheck"] > 300: self._api_info_cache["lastcheck"] = now try: self._api_info_cache["maxlag"] = apilag = self.get_maxlag() except exceptions.APIError: self._api_info_cache["maxlag"] = apilag = 0 else: apilag = self._api_info_cache["maxlag"] if apilag > self._maxlag: return [self.SERVICE_SQL, self.SERVICE_API] return [self.SERVICE_API, self.SERVICE_SQL] return [self.SERVICE_SQL, self.SERVICE_API] @property def name(self): """The Site's name (or "wikiid" in the API), like ``"enwiki"``.""" return self._name @property def project(self): """The Site's project name in lowercase, like ``"wikipedia"``.""" return self._project @property def lang(self): """The Site's language code, like ``"en"`` or ``"es"``.""" return self._lang @property def domain(self): """The Site's web domain, like ``"en.wikipedia.org"``.""" return urlparse(self._base_url).netloc @property def url(self): """The Site's full base URL, like ``"https://en.wikipedia.org"``.""" url = self._base_url if url.startswith("//"): # Protocol-relative URLs from 1.18 if self._use_https: url = "https:" + url else: url = "http:" + url return url @property def user_agent(self): """The User-Agent header sent to the API by the requests session.""" return self._session.headers["User-Agent"] def api_query(self, **kwargs): """Do an API query with `kwargs` as the parameters. This will first attempt to construct an API url from :py:attr:`self._base_url` and :py:attr:`self._script_path`. We need both of these, or else we'll raise :py:exc:`~earwigbot.exceptions.APIError`. If :py:attr:`self._base_url` is protocol-relative (introduced in MediaWiki 1.18), we'll choose HTTPS only if :py:attr:`self._user_https` is ``True``, otherwise HTTP. We'll encode the given params, adding ``format=json`` along the way, as well as ``&assert=`` and ``&maxlag=`` based on :py:attr:`self._assert_edit` and :py:attr:`_maxlag` respectively. Additionally, we'll sleep a bit if the last query was made fewer than :py:attr:`self._wait_between_queries` seconds ago. The request is made through :py:attr:`self._session`, which has cookie support (:py:attr:`self._cookiejar`) and a ``User-Agent`` (:py:const:`earwigbot.wiki.constants.USER_AGENT`). Assuming everything went well, we'll gunzip the data (if compressed), load it as a JSON object, and return it. If our request failed for some reason, we'll raise :py:exc:`~earwigbot.exceptions.APIError` with details. If that reason was due to maxlag, we'll sleep for a bit and then repeat the query until we exceed :py:attr:`self._max_retries`. There is helpful MediaWiki API documentation at `MediaWiki.org <https://www.mediawiki.org/wiki/API>`_. """ with self._api_lock: return self._api_query(kwargs) def sql_query(self, query, params=(), plain_query=False, dict_cursor=False, cursor_class=None, show_table=False, buffsize=1024): """Do an SQL query and yield its results. If *plain_query* is ``True``, we will force an unparameterized query. Specifying both *params* and *plain_query* will cause an error. If *dict_cursor* is ``True``, we will use :py:class:`oursql.DictCursor` as our cursor, otherwise the default :py:class:`oursql.Cursor`. If *cursor_class* is given, it will override this option. If *show_table* is True, the name of the table will be prepended to the name of the column. This will mainly affect an :py:class:`~oursql.DictCursor`. *buffsize* is the size of each memory-buffered group of results, to reduce the number of conversations with the database; it is passed to :py:meth:`cursor.fetchmany() <oursql.Cursor.fetchmany>`. If set to ``0```, all results will be buffered in memory at once (this uses :py:meth:`fetchall() <oursql.Cursor.fetchall>`). If set to ``1``, it is equivalent to using :py:meth:`fetchone() <oursql.Cursor.fetchone>`. Example usage:: >>> query = "SELECT user_id, user_registration FROM user WHERE user_name = ?" >>> params = ("The Earwig",) >>> result1 = site.sql_query(query, params) >>> result2 = site.sql_query(query, params, dict_cursor=True) >>> for row in result1: print row (7418060L, '20080703215134') >>> for row in result2: print row {'user_id': 7418060L, 'user_registration': '20080703215134'} This may raise :py:exc:`~earwigbot.exceptions.SQLError` or one of oursql's exceptions (:py:exc:`oursql.ProgrammingError`, :py:exc:`oursql.InterfaceError`, ...) if there were problems with the query. See :py:meth:`_sql_connect` for information on how a connection is acquired. Also relevant is `oursql's documentation <http://packages.python.org/oursql>`_ for details on that package. """ if not cursor_class: if dict_cursor: cursor_class = oursql.DictCursor else: cursor_class = oursql.Cursor klass = cursor_class with self._sql_lock: if not self._sql_conn: self._sql_connect() with self._sql_conn.cursor(klass, show_table=show_table) as cur: cur.execute(query, params, plain_query) if buffsize: while True: group = cur.fetchmany(buffsize) if not group: return for result in group: yield result for result in cur.fetchall(): yield result def get_maxlag(self, showall=False): """Return the internal database replication lag in seconds. In a typical setup, this function returns the replication lag *within* the WMF's cluster, *not* external replication lag affecting the Toolserver (see :py:meth:`get_replag` for that). This is useful when combined with the ``maxlag`` API query param (added by config), in which queries will be halted and retried if the lag is too high, usually above five seconds. With *showall*, will return a list of the lag for all servers in the cluster, not just the one with the highest lag. """ params = {"action": "query", "meta": "siteinfo", "siprop": "dbrepllag"} if showall: params["sishowalldb"] = 1 with self._api_lock: result = self._api_query(params, ignore_maxlag=True) if showall: return [server["lag"] for server in result["query"]["dbrepllag"]] return result["query"]["dbrepllag"][0]["lag"] def get_replag(self): """Return the estimated external database replication lag in seconds. Requires SQL access. This function only makes sense on a replicated database (e.g. the Wikimedia Toolserver) and on a wiki that receives a large number of edits (ideally, at least one per second), or the result may be larger than expected, since it works by subtracting the current time from the timestamp of the latest recent changes event. This may raise :py:exc:`~earwigbot.exceptions.SQLError` or one of oursql's exceptions (:py:exc:`oursql.ProgrammingError`, :py:exc:`oursql.InterfaceError`, ...) if there were problems. """ query = """SELECT UNIX_TIMESTAMP() - UNIX_TIMESTAMP(rc_timestamp) FROM recentchanges ORDER BY rc_timestamp DESC LIMIT 1""" result = list(self.sql_query(query)) return int(result[0][0]) def get_token(self, action=None, force=False): """Return a token for a data-modifying API action. In general, this will be a CSRF token, unless *action* is in a special list of non-CSRF tokens. Tokens are cached for the session (until :meth:`_login` is called again); set *force* to ``True`` to force a new token to be fetched. Raises :exc:`.APIError` if there was an API issue. """ if action not in self.SPECIAL_TOKENS: action = "csrf" if action in self._tokens and not force: return self._tokens[action] res = self.api_query(action="query", meta="tokens", type=action) if action not in self._tokens: err = "Tried to fetch a {0} token, but API returned: {1}" raise exceptions.APIError(err.format(action, res)) return self._tokens[action] def namespace_id_to_name(self, ns_id, all=False): """Given a namespace ID, returns associated namespace names. If *all* is ``False`` (default), we'll return the first name in the list, which is usually the localized version. Otherwise, we'll return the entire list, which includes the canonical name. For example, this returns ``u"Wikipedia"`` if *ns_id* = ``4`` and *all* is ``False`` on ``enwiki``; returns ``[u"Wikipedia", u"Project", u"WP"]`` if *ns_id* = ``4`` and *all* is ``True``. Raises :py:exc:`~earwigbot.exceptions.NamespaceNotFoundError` if the ID is not found. """ try: if all: return self._namespaces[ns_id] else: return self._namespaces[ns_id][0] except KeyError: e = "There is no namespace with id {0}.".format(ns_id) raise exceptions.NamespaceNotFoundError(e) def namespace_name_to_id(self, name): """Given a namespace name, returns the associated ID. Like :py:meth:`namespace_id_to_name`, but reversed. Case is ignored, because namespaces are assumed to be case-insensitive. Raises :py:exc:`~earwigbot.exceptions.NamespaceNotFoundError` if the name is not found. """ lname = name.lower() for ns_id, names in self._namespaces.items(): lnames = [n.lower() for n in names] # Be case-insensitive if lname in lnames: return ns_id e = u"There is no namespace with name '{0}'.".format(name) raise exceptions.NamespaceNotFoundError(e) def get_page(self, title, follow_redirects=False, pageid=None): """Return a :py:class:`Page` object for the given title. *follow_redirects* is passed directly to :py:class:`~earwigbot.wiki.page.Page`'s constructor. Also, this will return a :py:class:`~earwigbot.wiki.category.Category` object instead if the given title is in the category namespace. As :py:class:`~earwigbot.wiki.category.Category` is a subclass of :py:class:`~earwigbot.wiki.page.Page`, this should not cause problems. Note that this doesn't do any direct checks for existence or redirect-following: :py:class:`~earwigbot.wiki.page.Page`'s methods provide that. """ title = self._unicodeify(title) prefixes = self.namespace_id_to_name(constants.NS_CATEGORY, all=True) prefix = title.split(":", 1)[0] if prefix != title: # Avoid a page that is simply "Category" if prefix in prefixes: return Category(self, title, follow_redirects, pageid, self._logger) return Page(self, title, follow_redirects, pageid, self._logger) def get_category(self, catname, follow_redirects=False, pageid=None): """Return a :py:class:`Category` object for the given category name. *catname* should be given *without* a namespace prefix. This method is really just shorthand for :py:meth:`get_page("Category:" + catname) <get_page>`. """ catname = self._unicodeify(catname) prefix = self.namespace_id_to_name(constants.NS_CATEGORY) pagename = u':'.join((prefix, catname)) return Category(self, pagename, follow_redirects, pageid, self._logger) def get_user(self, username=None): """Return a :py:class:`User` object for the given username. If *username* is left as ``None``, then a :py:class:`~earwigbot.wiki.user.User` object representing the currently logged-in (or anonymous!) user is returned. """ if username: username = self._unicodeify(username) else: username = self._get_username() return User(self, username, self._logger) def delegate(self, services, args=None, kwargs=None): """Delegate a task to either the API or SQL depending on conditions. *services* should be a dictionary in which the key is the service name (:py:attr:`self.SERVICE_API <SERVICE_API>` or :py:attr:`self.SERVICE_SQL <SERVICE_SQL>`), and the value is the function to call for this service. All functions will be passed the same arguments the tuple *args* and the dict *kwargs*, which are both empty by default. The service order is determined by :py:meth:`_get_service_order`. Not every service needs an entry in the dictionary. Will raise :py:exc:`~earwigbot.exceptions.NoServiceError` if an appropriate service cannot be found. """ if not args: args = () if not kwargs: kwargs = {} order = self._get_service_order() for srv in order: if srv in services: try: return services[srv](*args, **kwargs) except exceptions.ServiceError: continue raise exceptions.NoServiceError(services)
class ControlPlaneClient(object): """ """ def __init__(self, user, password, host=None, port=None): """ """ self._cj = CookieJar() self._opener = urllib2.build_opener( urllib2.HTTPHandler(), urllib2.HTTPSHandler(), urllib2.HTTPCookieProcessor(self._cj) ) self._server = { "host": host if host else _DEFAULT_HOST, "port": port if port else _DEFAULT_PORT, } self._creds = {"username": user, "password": password} self._netloc = "%(host)s:%(port)s" % self._server def queryServices(self, name=None, tags=None, tenantID=None): """ Returns a sequence of ServiceDefinition objects that match the given requirements. """ query = {} if name: namepat = fnmatch.translate(name) # controlplane regex accepts \z, not \Z. namepat = namepat.replace("\\Z", "\\z") query["name"] = namepat if tags: if isinstance(tags, (str, unicode)): tags = [tags] query["tags"] = ','.join(tags) if tenantID: query["tenantID"] = tenantID response = self._dorequest("/services", query=query) body = ''.join(response.readlines()) response.close() decoded = ServiceJsonDecoder().decode(body) if decoded is None: decoded = [] return decoded def getService(self, serviceId, default=None): """ Returns the ServiceDefinition object for the given service. """ response = self._dorequest("/services/%s" % serviceId) body = ''.join(response.readlines()) response.close() return ServiceJsonDecoder().decode(body) def updateService(self, service): """ Updates the definition/state of a service. :param ServiceDefinition service: The modified definition """ body = ServiceJsonEncoder().encode(service) LOG.info("Updating service '%s':%s", service.name, service.id) LOG.debug("Updating service %s", body) response = self._dorequest( service.resourceId, method="PUT", data=body ) body = ''.join(response.readlines()) response.close() def startService(self, serviceId): """ Start the given service :param string ServiceId: The service to start """ LOG.info("Starting service '%s", serviceId) response = self._dorequest("/services/%s/startService" % serviceId, method='PUT') body = ''.join(response.readlines()) response.close() return ServiceJsonDecoder().decode(body) def stopService(self, serviceId): """ Stop the given service :param string ServiceId: The service to stop """ LOG.info("Stopping service '%s", serviceId) response = self._dorequest("/services/%s/stopService" % serviceId, method='PUT') body = ''.join(response.readlines()) response.close() return ServiceJsonDecoder().decode(body) def addService(self, serviceDefinition): """ Add a new service :param string serviceDefinition: json encoded representation of service :returns string: json encoded representation of new service's links """ LOG.info("Adding service") LOG.debug(serviceDefinition) response = self._dorequest( "/services/add", method="POST", data=serviceDefinition ) body = ''.join(response.readlines()) response.close() return body def deleteService(self, serviceId): """ Delete a service :param string serviceId: Id of the service to delete """ LOG.info("Removing service %s", serviceId) response = self._dorequest( "/services/%s" % serviceId, method="DELETE" ) response.close() def deployService(self, parentId, service): """ Deploy a new service :param string parentId: parent service id :param string service: json encoded representation of service :returns string: json encoded representation of new service's links """ LOG.info("Deploying service") data = { 'ParentID': parentId, 'Service': json.loads(service) } LOG.debug(data) response = self._dorequest( "/services/deploy", method="POST", data=json.dumps(data) ) body = ''.join(response.readlines()) response.close() return body def queryServiceInstances(self, serviceId): """ Returns a sequence of ServiceInstance objects. """ response = self._dorequest("/services/%s/running" % serviceId) body = ''.join(response.readlines()) response.close() return ServiceJsonDecoder().decode(body) def queryServiceStatus(self, serviceId): """ Returns a sequence of ServiceInstance objects. """ response = self._dorequest("/services/%s/status" % serviceId) body = ''.join(response.readlines()) response.close() return ServiceStatusJsonDecoder().decode(body) def queryHosts(self): """ Returns a sequence of Host objects. """ response = self._dorequest("/hosts") body = ''.join(response.readlines()) response.close() return HostJsonDecoder().decode(body) def getHost(self, hostId): """ Returns a sequence of Host objects. """ response = self._dorequest("/hosts/%" % hostId) body = ''.join(response.readlines()) response.close() return HostJsonDecoder().decode(body) def getInstance(self, serviceId, instanceId, default=None): """ Returns the requested ServiceInstance object. """ response = self._dorequest( "/services/%s/running/%s" % (serviceId, instanceId) ) body = ''.join(response.readlines()) response.close() return ServiceJsonDecoder().decode(body) def getServiceLog(self, serviceId, start=0, end=None): """ """ response = self._dorequest("/services/%s/logs" % serviceId) body = ''.join(response.readlines()) response.close() log = json.loads(body) return log["Detail"] def getInstanceLog(self, serviceId, instanceId, start=0, end=None): """ """ response = self._dorequest( "/services/%s/%s/logs" % (serviceId, instanceId) ) body = ''.join(response.readlines()) response.close() log = json.loads(body) return str(log["Detail"]) def killInstance(self, hostId, instanceId): """ """ response = self._dorequest( "/hosts/%s/%s" % (hostId, instanceId), method="DELETE" ) response.close() def _makeRequest(self, uri, method=None, data=None, query=None): query = urllib.urlencode(query) if query else "" url = urlunparse(("https", self._netloc, uri, "", query, "")) args = {} if method: args["method"] = method if data: args["data"] = data args["headers"] = {"Content-Type": "application/json"} return _Request(url, **args) def _login(self): # Clear the cookie jar before logging in. self._cj.clear() encodedbody = json.dumps(self._creds) request = self._makeRequest("/login", data=encodedbody) response = self._opener.open(request) response.close() self._opener.close() def _dorequest(self, uri, method=None, data=None, query=None): request = self._makeRequest( uri, method=method, data=data, query=query) # Try to perform the request up to five times for trycount in range(5): try: return self._opener.open(request) except urllib2.HTTPError as ex: if ex.getcode() == 401: self._login() continue elif ex.getcode() == 500: # Make the exception prettier and reraise it try: msg = json.load(ex) except ValueError: raise ex # This stinks because we lose the stack detail = msg.get('Detail') if not detail: raise detail = detail.replace("Internal Server Error: ", "") raise ControlCenterError(detail) raise else: # break the loop so we skip the loop's else clause break else: # raises the last exception that was raised (the 401 error) raise
class Site(object): """Main point for which interaction with a MediaWiki API is made.""" GITHUB = "https://github.com/ceradon/cerabot" USER_AGENT = "Cerabot/{0!r} (wikibot; Python/{1!r}; {2!r})" USER_AGENT = USER_AGENT.format("0.1", pyv(), GITHUB) config = {"throttle":10, "maxlag":10, "max_retries":3} def __init__(self, name=None, base_url="//en.wikipedia.org", project=None, lang=None, namespaces={}, login=(None, None), secure=False, config=None, user_agent=None, article_path=None, script_path="/w"): self._name = name if not project and not lang: self._base_url = base_url self._project = None self._lang = None else: self._lang = lang self._project = project self._base_url = "http://{0!r}.{1!r}".format(self._lang, self._project) self._article_path = article_path self._script_path = script_path self._namespaces = namespaces if config: self._config = config else: self._config = self.config self._login_data = login self._secure = secure self._tokens = {} if user_agent: self._user_agent = user_agent else: self._user_agent = self.USER_AGENT self._throttle, self._maxlag, self._max_retries = self._config.values() self._last_query_time = 0 self.cookie_jar = CookieJar() self.api_lock = Lock() self.opener = build_opener(HTTPCookieProcessor(self.cookie_jar)) self.opener.addheaders = [("User-Agent", self._user_agent), ("Accept-Encoding", "gzip")] if self._login_data[0] and self._login_data[1]: self.login(login) self._load() def urlencode(self, params): """Implement urllib.urlencode() with support for unicode input. Thanks to Earwig (Ben Kurtovic) for this code.""" enc = lambda s: s.encode("utf8") if isinstance(s, unicode) else str(s) args = [] for key, val in params.iteritems(): key = quote_plus(enc(key)) val = quote_plus(enc(val)) args.append(key + "=" + val) return "&".join(args) def _query(self, params, query_continue=False, tries=0, idle=5, non_stop=False, prefix=None): """Queries the site's API.""" last_query = time.time() - self._last_query_time if last_query < self._throttle: throttle = self._throttle - last_query print "Throttling: waiting {0} seconds".format(round(throttle, 2)) time.sleep(throttle) params.setdefault("maxlag", self._maxlag) params.setdefault("format", "json") params["continue"] = "" try: if type(prefix).__name__ in ["tuple", "list"]: for p in prefix: params[p + "limit"] = "max" else: params[prefix + "limit"] = "max" except TypeError: pass protocol = "https:" if self._secure else "http:" url = ''.join((protocol, self._base_url, self._script_path, "/api.php")) data = self.urlencode(params) try: reply = self.opener.open(url, data) except URLError as e: if hasattr(e, "code"): exc = "API query could not be completed: Error code: {0}" exc = exc.format(e.code) elif hasattr(e, "reason"): exc = "API query could not be completed. Reason: {0}" exc = exc.format(e.reason) else: exc = "API query could not be completed." raise exceptions.APIError(exc) result = reply.read() if reply.headers.get("Content-Encoding") == "gzip": stream = StringIO(result) zipper = gzip.GzipFile(fileobj=stream) result = zipper.read() try: res = json.loads(result) except ValueError: e = "API query failed: JSON could not be loaded" raise exceptions.APIError(e) try: code = res["error"]["code"] info = res["error"]["info"] except (TypeError, ValueError, KeyError): if "continue" in res and query_continue: continue_data = self._handle_query_continue(params, res, max_continues=5 if not non_stop else "max") res.update(continue_data) return res if code == "maxlag": if tries >= self._max_retries: e = "Maximum amount of allowed retries has been exhausted." raise exception.APIError(e) tries += 1 time.sleep(idle) return self._query(params, tries=tries, idle=idle*2) else: e = "An unknown error occured. Here is the data from the API: {0}" return_data = "({0}, {1})".format(code, info) error = exceptions.APIError(e.format(return_data)) error.code, error.info = code, info raise error def _load(self, force=False): """Loads the sites attributes. Called automatically on initiation.""" attrs = [self._name, self._project, self._lang, self._base_url, self._script_path, self._article_path] query = {"action":"query", "meta":"siteinfo", "siprop":"general"} if not self._namespaces or force: query["siprop"] += "|namespaces|namespacealiases" result = self._query(query) for item in result["query"]["namespaces"].values(): ns_id = item["id"] name = item["*"] try: canonical = item["canonical"] except KeyError: self._namespaces[ns_id] = [name] else: if name != canonical: self._namespaces[ns_id] = [name, canonical] else: self._namespaces[ns_id] = [name] for item in result["query"]["namespacealiases"]: ns_id = item["id"] alias = item["*"] self._namespaces[ns_id].append(alias) elif all(attrs): return else: result = self.query(query) result = result["query"]["general"] self._name = result["wikiid"] self._project = result["sitename"].lower() self._lang = result["lang"] self._base_url = result["server"] self._script_path = result["scriptpath"] self._article_path = result["articlepath"] def _handle_query_continue(self, request, data, max_continues=5): """Handle \'query-continues\' in API queries.""" all_data = {} count = 0 last_continue = {} if max_continues == "max": # I solemnly doubt there will ever be this many continues max_continues = 10000 while "continue" in data and count < max_continues: query = deepcopy(request) query.update(last_continue) res = self._query(query) if "continue" in res: last_continue = res["continue"] try: if not all_data: all_data = res else: all_data.update(res) except (KeyError, IndexError): pass count += 1 data = res data.update(all_data) return data def page(self, title="", pageid=0, follow_redirects=False): """Returns an instance of Page for *title* with *follow_redirects* and *pageid* as arguments, unless *title* is a category, then returns a Cateogry instance.""" return Page(self, title, pageid, follow_redirects) def category(self, title="", pageid=0, follow_redirects=False): """Returns an instance of Category for *title* with *follow_redirects* and *pageid* as arguments.""" return Category(self, title, pageid, follow_redirects) def user(self, name=None): """Returns an instance of User for *username*.""" return User(name) def file(self, title, pageid=0, follow_redirects=False): """Returns an instance of File for *title* or *pageid*.""" return File(title, pageid, follow_redirects) @property def domain(self): """Returns the site's web domain, like \"en.wikipedia.org\"""" return urlparse(self._base_url).netloc def get_username(self): """Gets the name of the user that is currently logged into the site's API. Simple way to ensure that we are logged in.""" data = self.query({"action":"query", "meta":"userinfo"}) return data["query"]["userinfo"]["name"] def get_cookies(self, name, domain): for cookie in self.cookie_jar: if cookie.name == name and cookie.domain == domain: if cookie.is_expired(): break return cookie def save_cookie_jar(self): """Attempts to save all changes to our cookiejar after a successful login or logout.""" if hasattr(self.cookie_jar, "save"): try: getattr(self._cookiejar, "save")() except (NotImplementedError, ValueError): pass def query(self, params, query_continue=False, non_stop=False, prefix=None): """Queries the site's API.""" with self.api_lock: i = self._query(params, query_continue, non_stop=non_stop, prefix=prefix) return i def _login(self, login, token=None, attempts=0): """Logs into the site's API.""" username, password = login if token: i = self.query({"action":"login", "lgname":username, "lgpassword":password, "lgtoken":token}) else: i = self.query({"action":"login", "lgname":username, "lgpassword":password}) res = i["login"]["result"] if res == "Success": self.save_cookie_jar() elif res == "NeedToken" and attempts == 0: token = i["login"]["token"] return self._login(login, token, attempts=1) else: if res == "Illegal": e = "The provided username is illegal." elif res == "NotExists": e = "The provided username does not exist." elif res == "EmptyPass": e = "No password was given." elif res == "WrongPass" or res == "WrongPluginPass": e = "The given password is incorrect." else: e = "An unknown error occured, API responded with {0)." e = e.format(res) raise exceptions.APILoginError(e) def login(self, login): """Public method for logging in to the API.""" if not login: if self._login[0]: login = self._login else: e = "No login data or insufficient data provided." raise exceptions.APILoginError(e) if type(login) == tuple: self._login(login) else: e = "Login data must be in tuple format, got {0}" raise exceptions.APILoginError(e.format(type(login))) def logout(self): """Attempts to logout out the API and clear the cookie jar.""" self.query({"action":"logout"}) self.cookie_jar.clear() self.save_cookie_jar() def tokener(self, args=[]): i = re.compile("Action (.*?) is not allowed for the current user") valid_args = ["block", "delete", "edit", "email", "import", "move", "options", "patrol", "protect", "unblock", "watch"] if not args: args = valid_args if self._tokens: m = {} for token in args: try: m[token] = self._tokens[token] except (KeyError, IndexError): m[token] = None continue return m if not type(args) == list: return query = {"action":"query", "prop":"info", "titles":"Main Page", "intoken":"|".join(args)} result = self.query(query) res = result["query"]["pages"] _tokens = {} c = res.keys()[0] possible_tokens = res[c] for key, val in possible_tokens.items(): if key.endswith("token"): name = key[:key.find("token")] _tokens[name] = val args.pop(args.index(name)) if "warnings" in result: a = result["warnings"]["info"]["*"].split("\n") if len(a) > 1: a = [b for b in a if b.lower().startswith("action")] for item in a: name = i.findall(item) name = name[0].strip("'") _tokens[name] = None self._tokens.update(_tokens) return _tokens def iterator(self, **kwargs): """Iterates over result of api query with *kwargs* as arguments and returns a generator.""" result = None if "action" in kwargs.keys(): kwargs.pop("action", 0) kwargs["action"] = "query" res = self.query(kwargs) if "warnings" in res: e = "Unknown error occured while attempting iterator query." e += " Got back: {0}".format(res) raise exceptions.APIError(e) if len(res["query"]) > 1: result = {} a = {} b = list(res["query"]) for key, val in res["query"].items(): a[key] = val while len(b) > 0: key = b.pop(0, False) if not key: break results[key] = itertools.chain(a[key]) elif len(res["query"]) == 1: result = (i for i in res["query"][list(res["query"])[0]]) return result def name_to_id(self, name): """Returns the associated id to the namespace *name*.""" for ns_id, names in self._namespaces.items(): if name.lower() in [i.lower() for i in names]: return ns_id error = "No such namespace with name {0}." raise exceptions.APIError(error) def id_to_name(self, ns_id, get_all=False): """Returns the associated name to the namespace id *ns_id*.""" try: if get_all: return self._namespaces[ns_id] else: return self._namespaces[ns_id][0] except KeyError: error = "No such id with namespace {0}." raise exceptions.APIError(error) def __repr__(self): """Returns a coanonical string representation of Site.""" res = u"Site(name={0}, base_url={1}, project={2}, lang={3}, "+ \ "namespaces={4}, secure={5}, config={6}, article_path={7}"+ \ "script_path={8}, user_agent={9}".format(self._name, self._base_url, self._project, self._lang, self._namespaces, self._secure, unicode(self._config), self._article_path, self._script_path, self._user_agent) if self._login_data[0] and self._login_data[1]: res = res + ", username={0}, password=<hidden>".format( self._login_data[0]) return res def __str__(self): """Returns a prettier string representation of Site.""" res = u"<Site(site object %s (%s, %s) for site %s"+ \ " with user %s, config %s and user agent %s." if self._login_data[0]: res = res % (self._name, self._lang, self._project, self._base_url, self._login_data[0], unicode(self._config), self._user_agent) return res.replace("'", "") res = res.replace("user %s, ", "") res = res % (self._name, self._lang, self._project, self._base_url, unicode(self._config), self._user_agent) return res.replace("'", "")
class GHttp(): def __init__(self): """ class initialisation, creates cookie jar and headers """ self.lastpage = None self.lasterror = None self.cj = CookieJar() self.cookieH = urllib2.HTTPCookieProcessor(self.cj) self.redirectH = urllib2.HTTPRedirectHandler() self.proxyH = None self.opener = urllib2.build_opener(self.cookieH, self.redirectH) def addproxy(self, proxyipport): self.proxyH = urllib2.ProxyHandler({'http':proxyipport}) self.opener = urllib2.build_opener(self.cookieH, self.redirectH, self.proxyH) if self.rq('http://google.com') is None: return False return True def removeproxy(self): """ Removes the currently set proxy """ self.proxyH = None self.opener = urllib2.build_opener(self.cookieH, self.redirectH) def clearcookies(self): """ clears all cookies from the cookie jar :) """ self.cj.clear() def rq(self, url, ref=None, data=None): """ Http request, it either returns response html or none if there's an error. Keyword arguments: url -- the url you want to request data -- this is for the POST method, data that you will be seding ref -- the referer to your request page, if none specified it will use last page's url or the current url (default None) """ # reset lasterror self.lasterror = None # set the referrer if ref is None: if self.lastpage is None: self.lastpage = url ref = self.lastpage self.opener.addheaders = [('Referer', ref), ('User-Agent', 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.597.98 Safari/534.13')] # if data is a dictionary type we will use urllibe to encode it # to url format if isinstance(data, dict): data = urllib.urlencode(data) # catch exceptions so program does not crash try: if data is not None: opnr = self.opener.open(url, data=data) else: opnr = self.opener.open(url) except urllib2.HTTPError, e: self.lasterror = 'The server couldn\'t fulfill the request.' + \ 'Error code: %s' % e.code return None except urllib2.URLError, e: self.lasterror = 'We failed to reach a server. Reason: %s' % e.reason return None
class Bugz: """ Converts sane method calls to Bugzilla HTTP requests. @ivar base: base url of bugzilla. @ivar user: username for authenticated operations. @ivar password: password for authenticated operations @ivar cookiejar: for authenticated sessions so we only auth once. @ivar forget: forget user/password after session. @ivar authenticated: is this session authenticated already """ def __init__(self, base, user = None, password = None, forget = False, skip_auth = False, httpuser = None, httppassword = None ): """ {user} and {password} will be prompted if an action needs them and they are not supplied. if {forget} is set, the login cookie will be destroyed on quit. @param base: base url of the bugzilla @type base: string @keyword user: username for authenticated actions. @type user: string @keyword password: password for authenticated actions. @type password: string @keyword forget: forget login session after termination. @type forget: bool @keyword skip_auth: do not authenticate @type skip_auth: bool """ self.base = base scheme, self.host, self.path, query, frag = urlsplit(self.base) self.authenticated = False self.forget = forget if not self.forget: try: cookie_file = os.path.join(os.environ['HOME'], COOKIE_FILE) self.cookiejar = LWPCookieJar(cookie_file) if forget: try: self.cookiejar.load() self.cookiejar.clear() self.cookiejar.save() os.chmod(self.cookiejar.filename, 0700) except IOError: pass except KeyError: self.warn('Unable to save session cookies in %s' % cookie_file) self.cookiejar = CookieJar(cookie_file) else: self.cookiejar = CookieJar() self.opener = build_opener(HTTPCookieProcessor(self.cookiejar)) self.user = user self.password = password self.httpuser = httpuser self.httppassword = httppassword self.skip_auth = skip_auth def log(self, status_msg): """Default logging handler. Expected to be overridden by the UI implementing subclass. @param status_msg: status message to print @type status_msg: string """ return def warn(self, warn_msg): """Default logging handler. Expected to be overridden by the UI implementing subclass. @param status_msg: status message to print @type status_msg: string """ return def get_input(self, prompt): """Default input handler. Expected to be override by the UI implementing subclass. @param prompt: Prompt message @type prompt: string """ return '' def auth(self): """Authenticate a session. """ # check if we need to authenticate if self.authenticated: return # try seeing if we really need to request login if not self.forget: try: self.cookiejar.load() except IOError: pass req_url = urljoin(self.base, config.urls['auth']) req_url += '?GoAheadAndLogIn=1' req = Request(req_url, None, config.headers) if self.httpuser and self.httppassword: base64string = base64.encodestring('%s:%s' % (self.httpuser, self.httppassword))[:-1] req.add_header("Authorization", "Basic %s" % base64string) resp = self.opener.open(req) re_request_login = re.compile(r'<title>.*Log in to Bugzilla</title>') if not re_request_login.search(resp.read()): self.log('Already logged in.') self.authenticated = True return # prompt for username if we were not supplied with it if not self.user: self.log('No username given.') self.user = self.get_input('Username: '******'No password given.') self.password = getpass.getpass() # perform login qparams = config.params['auth'].copy() qparams['Bugzilla_login'] = self.user qparams['Bugzilla_password'] = self.password if not self.forget: qparams['Bugzilla_remember'] = 'on' req_url = urljoin(self.base, config.urls['auth']) req = Request(req_url, urlencode(qparams), config.headers) if self.httpuser and self.httppassword: base64string = base64.encodestring('%s:%s' % (self.httpuser, self.httppassword))[:-1] req.add_header("Authorization", "Basic %s" % base64string) resp = self.opener.open(req) if resp.info().has_key('Set-Cookie'): self.authenticated = True if not self.forget: self.cookiejar.save() os.chmod(self.cookiejar.filename, 0700) return True else: raise RuntimeError("Failed to login") def extractResults(self, resp): # parse the results into dicts. results = [] columns = [] rows = [] for r in csv.reader(resp): rows.append(r) for field in rows[0]: if config.choices['column_alias'].has_key(field): columns.append(config.choices['column_alias'][field]) else: self.log('Unknown field: ' + field) columns.append(field) for row in rows[1:]: if row[0].find("Missing Search") != -1: self.log('Bugzilla error (Missing search found)') return None fields = {} for i in range(min(len(row), len(columns))): fields[columns[i]] = row[i] results.append(fields) return results def search(self, query, comments = False, order = 'number', assigned_to = None, reporter = None, cc = None, commenter = None, whiteboard = None, keywords = None, status = [], severity = [], priority = [], product = [], component = []): """Search bugzilla for a bug. @param query: query string to search in title or {comments}. @type query: string @param order: what order to returns bugs in. @type order: string @keyword assigned_to: email address which the bug is assigned to. @type assigned_to: string @keyword reporter: email address matching the bug reporter. @type reporter: string @keyword cc: email that is contained in the CC list @type cc: string @keyword commenter: email of a commenter. @type commenter: string @keyword whiteboard: string to search in status whiteboard (gentoo?) @type whiteboard: string @keyword keywords: keyword to search for @type keywords: string @keyword status: bug status to match. default is ['NEW', 'ASSIGNED', 'REOPENED']. @type status: list @keyword severity: severity to match, empty means all. @type severity: list @keyword priority: priority levels to patch, empty means all. @type priority: list @keyword comments: search comments instead of just bug title. @type comments: bool @keyword product: search within products. empty means all. @type product: list @keyword component: search within components. empty means all. @type component: list @return: list of bugs, each bug represented as a dict @rtype: list of dicts """ if not self.authenticated and not self.skip_auth: self.auth() qparams = config.params['list'].copy() if comments: qparams['long_desc'] = query else: qparams['short_desc'] = query qparams['order'] = config.choices['order'].get(order, 'Bug Number') qparams['bug_severity'] = severity or [] qparams['priority'] = priority or [] if status == None: qparams['bug_status'] = ['NEW', 'ASSIGNED', 'REOPENED'] elif [s.upper() for s in status] == ['ALL']: qparams['bug_status'] = config.choices['status'] else: qparams['bug_status'] = [s.upper() for s in status] qparams['product'] = product or '' qparams['component'] = component or '' qparams['status_whiteboard'] = whiteboard or '' qparams['keywords'] = keywords or '' # hoops to jump through for emails, since there are # only two fields, we have to figure out what combinations # to use if all three are set. unique = list(set([assigned_to, cc, reporter, commenter])) unique = [u for u in unique if u] if len(unique) < 3: for i in range(len(unique)): e = unique[i] n = i + 1 qparams['email%d' % n] = e qparams['emailassigned_to%d' % n] = int(e == assigned_to) qparams['emailreporter%d' % n] = int(e == reporter) qparams['emailcc%d' % n] = int(e == cc) qparams['emaillongdesc%d' % n] = int(e == commenter) else: raise AssertionError('Cannot set assigned_to, cc, and ' 'reporter in the same query') req_params = urlencode(qparams, True) req_url = urljoin(self.base, config.urls['list']) req_url += '?' + req_params req = Request(req_url, None, config.headers) if self.httpuser and self.httppassword: base64string = base64.encodestring('%s:%s' % (self.httpuser, self.httppassword))[:-1] req.add_header("Authorization", "Basic %s" % base64string) resp = self.opener.open(req) return self.extractResults(resp) def namedcmd(self, cmd): """Run command stored in Bugzilla by name. @return: Result from the stored command. @rtype: list of dicts """ if not self.authenticated and not self.skip_auth: self.auth() qparams = config.params['namedcmd'].copy() # Is there a better way of getting a command with a space in its name # to be encoded as foo%20bar instead of foo+bar or foo%2520bar? qparams['namedcmd'] = quote(cmd) req_params = urlencode(qparams, True) req_params = req_params.replace('%25','%') req_url = urljoin(self.base, config.urls['list']) req_url += '?' + req_params req = Request(req_url, None, config.headers) if self.user and self.hpassword: base64string = base64.encodestring('%s:%s' % (self.user, self.hpassword))[:-1] req.add_header("Authorization", "Basic %s" % base64string) resp = self.opener.open(req) return self.extractResults(resp) def get(self, bugid): """Get an ElementTree representation of a bug. @param bugid: bug id @type bugid: int @rtype: ElementTree """ if not self.authenticated and not self.skip_auth: self.auth() qparams = config.params['show'].copy() qparams['id'] = bugid req_params = urlencode(qparams, True) req_url = urljoin(self.base, config.urls['show']) req_url += '?' + req_params req = Request(req_url, None, config.headers) if self.httpuser and self.httppassword: base64string = base64.encodestring('%s:%s' % (self.httpuser, self.httppassword))[:-1] req.add_header("Authorization", "Basic %s" % base64string) resp = self.opener.open(req) fd = StringIO(resp.read()) # workaround for ill-defined XML templates in bugzilla 2.20.2 parser = ForcedEncodingXMLTreeBuilder(encoding = 'utf-8') etree = ElementTree.parse(fd, parser) bug = etree.find('.//bug') if bug and bug.attrib.has_key('error'): return None else: return etree def modify(self, bugid, title = None, comment = None, url = None, status = None, resolution = None, assigned_to = None, duplicate = 0, priority = None, severity = None, add_cc = [], remove_cc = [], add_dependson = [], remove_dependson = [], add_blocked = [], remove_blocked = [], whiteboard = None, keywords = None): """Modify an existing bug @param bugid: bug id @type bugid: int @keyword title: new title for bug @type title: string @keyword comment: comment to add @type comment: string @keyword url: new url @type url: string @keyword status: new status (note, if you are changing it to RESOLVED, you need to set {resolution} as well. @type status: string @keyword resolution: new resolution (if status=RESOLVED) @type resolution: string @keyword assigned_to: email (needs to exist in bugzilla) @type assigned_to: string @keyword duplicate: bug id to duplicate against (if resolution = DUPLICATE) @type duplicate: int @keyword priority: new priority for bug @type priority: string @keyword severity: new severity for bug @type severity: string @keyword add_cc: list of emails to add to the cc list @type add_cc: list of strings @keyword remove_cc: list of emails to remove from cc list @type remove_cc: list of string. @keyword add_dependson: list of bug ids to add to the depend list @type add_dependson: list of strings @keyword remove_dependson: list of bug ids to remove from depend list @type remove_dependson: list of strings @keyword add_blocked: list of bug ids to add to the blocked list @type add_blocked: list of strings @keyword remove_blocked: list of bug ids to remove from blocked list @type remove_blocked: list of strings @keyword whiteboard: set status whiteboard @type whiteboard: string @keyword keywords: set keywords @type keywords: string @return: list of fields modified. @rtype: list of strings """ if not self.authenticated and not self.skip_auth: self.auth() buginfo = Bugz.get(self, bugid) if not buginfo: return False modified = [] qparams = config.params['modify'].copy() qparams['id'] = bugid qparams['knob'] = 'none' # copy existing fields FIELDS = ('bug_file_loc', 'bug_severity', 'short_desc', 'bug_status', 'status_whiteboard', 'keywords', 'op_sys', 'priority', 'version', 'target_milestone', 'assigned_to', 'rep_platform', 'product', 'component') FIELDS_MULTI = ('blocked', 'dependson') for field in FIELDS: try: qparams[field] = buginfo.find('.//%s' % field).text except: pass for field in FIELDS_MULTI: qparams[field] = [d.text for d in buginfo.findall('.//%s' % field)] # set 'knob' if we are change the status/resolution # or trying to reassign bug. if status: status = status.upper() if resolution: resolution = resolution.upper() if status == 'RESOLVED' and status != qparams['bug_status']: qparams['knob'] = 'resolve' if resolution: qparams['resolution'] = resolution else: qparams['resolution'] = 'FIXED' modified.append(('status', status)) modified.append(('resolution', qparams['resolution'])) elif status == 'ASSIGNED' and status != qparams['bug_status']: qparams['knob'] = 'accept' modified.append(('status', status)) elif status == 'REOPENED' and status != qparams['bug_status']: qparams['knob'] = 'reopen' modified.append(('status', status)) elif status == 'VERIFIED' and status != qparams['bug_status']: qparams['knob'] = 'verified' modified.append(('status', status)) elif status == 'CLOSED' and status != qparams['bug_status']: qparams['knob'] = 'closed' modified.append(('status', status)) elif duplicate: qparams['knob'] = 'duplicate' qparams['dup_id'] = duplicate modified.append(('status', 'RESOLVED')) modified.append(('resolution', 'DUPLICATE')) elif assigned_to: qparams['knob'] = 'reassign' qparams['assigned_to'] = assigned_to modified.append(('assigned_to', assigned_to)) # setup modification of other bits if comment: qparams['comment'] = comment modified.append(('comment', ellipsis(comment, 60))) if title: qparams['short_desc'] = title or '' modified.append(('title', title)) if url != None: qparams['bug_file_loc'] = url modified.append(('url', url)) if severity != None: qparams['bug_severity'] = severity modified.append(('severity', severity)) if priority != None: qparams['priority'] = priority modified.append(('priority', priority)) # cc manipulation if add_cc != None: qparams['newcc'] = ', '.join(add_cc) modified.append(('newcc', qparams['newcc'])) if remove_cc != None: qparams['cc'] = remove_cc qparams['removecc'] = 'on' modified.append(('cc', remove_cc)) # bug depend/blocked manipulation changed_dependson = False changed_blocked = False if remove_dependson: for bug_id in remove_dependson: qparams['dependson'].remove(str(bug_id)) changed_dependson = True if remove_blocked: for bug_id in remove_blocked: qparams['blocked'].remove(str(bug_id)) changed_blocked = True if add_dependson: for bug_id in add_dependson: qparams['dependson'].append(str(bug_id)) changed_dependson = True if add_blocked: for bug_id in add_blocked: qparams['blocked'].append(str(bug_id)) changed_blocked = True qparams['dependson'] = ','.join(qparams['dependson']) qparams['blocked'] = ','.join(qparams['blocked']) if changed_dependson: modified.append(('dependson', qparams['dependson'])) if changed_blocked: modified.append(('blocked', qparams['blocked'])) if whiteboard != None: qparams['status_whiteboard'] = whiteboard modified.append(('status_whiteboard', whiteboard)) if keywords != None: qparams['keywords'] = keywords modified.append(('keywords', keywords)) req_params = urlencode(qparams, True) req_url = urljoin(self.base, config.urls['modify']) req = Request(req_url, req_params, config.headers) if self.httpuser and self.httppassword: base64string = base64.encodestring('%s:%s' % (self.httpuser, self.httppassword))[:-1] req.add_header("Authorization", "Basic %s" % base64string) try: resp = self.opener.open(req) return modified except: return [] def attachment(self, attachid): """Get an attachment by attachment_id @param attachid: attachment id @type attachid: int @return: dict with three keys, 'filename', 'size', 'fd' @rtype: dict """ if not self.authenticated and not self.skip_auth: self.auth() qparams = config.params['attach'].copy() qparams['id'] = attachid req_params = urlencode(qparams, True) req_url = urljoin(self.base, config.urls['attach']) req_url += '?' + req_params req = Request(req_url, None, config.headers) if self.httpuser and self.httppassword: base64string = base64.encodestring('%s:%s' % (self.httpuser, self.httppassword))[:-1] req.add_header("Authorization", "Basic %s" % base64string) resp = self.opener.open(req) try: content_type = resp.info()['Content-type'] namefield = content_type.split(';')[1] filename = re.search(r'name=\"(.*)\"', namefield).group(1) content_length = int(resp.info()['Content-length'], 0) return {'filename': filename, 'size': content_length, 'fd': resp} except: return {} def post(self, product, component, title, description, url = '', assigned_to = '', cc = '', keywords = '', version = '', dependson = '', blocked = '', priority = '', severity = ''): """Post a bug @param product: product where the bug should be placed @type product: string @param component: component where the bug should be placed @type component: string @param title: title of the bug. @type title: string @param description: description of the bug @type description: string @keyword url: optional url to submit with bug @type url: string @keyword assigned_to: optional email to assign bug to @type assigned_to: string. @keyword cc: option list of CC'd emails @type: string @keyword keywords: option list of bugzilla keywords @type: string @keyword version: version of the component @type: string @keyword dependson: bugs this one depends on @type: string @keyword blocked: bugs this one blocks @type: string @keyword priority: priority of this bug @type: string @keyword severity: severity of this bug @type: string @rtype: int @return: the bug number, or 0 if submission failed. """ if not self.authenticated and not self.skip_auth: self.auth() qparams = config.params['post'].copy() qparams['product'] = product qparams['component'] = component qparams['short_desc'] = title qparams['comment'] = description qparams['assigned_to'] = assigned_to qparams['cc'] = cc qparams['bug_file_loc'] = url qparams['dependson'] = dependson qparams['blocked'] = blocked qparams['keywords'] = keywords #XXX: default version is 'unspecified' if version != '': qparams['version'] = version #XXX: default priority is 'P2' if priority != '': qparams['priority'] = priority #XXX: default severity is 'normal' if severity != '': qparams['bug_severity'] = severity req_params = urlencode(qparams, True) req_url = urljoin(self.base, config.urls['post']) req = Request(req_url, req_params, config.headers) if self.httpuser and self.httppassword: base64string = base64.encodestring('%s:%s' % (self.httpuser, self.httppassword))[:-1] req.add_header("Authorization", "Basic %s" % base64string) resp = self.opener.open(req) try: re_bug = re.compile(r'<title>.*Bug ([0-9]+) Submitted</title>') bug_match = re_bug.search(resp.read()) if bug_match: return int(bug_match.group(1)) except: pass return 0 def attach(self, bugid, title, description, filename, content_type = 'text/plain'): """Attach a file to a bug. @param bugid: bug id @type bugid: int @param title: short description of attachment @type title: string @param description: long description of the attachment @type description: string @param filename: filename of the attachment @type filename: string @keywords content_type: mime-type of the attachment @type content_type: string @rtype: bool @return: True if successful, False if not successful. """ if not self.authenticated and not self.skip_auth: self.auth() qparams = config.params['attach_post'].copy() qparams['bugid'] = bugid qparams['description'] = title qparams['comment'] = description qparams['contenttypeentry'] = content_type filedata = [('data', filename, open(filename).read())] content_type, body = encode_multipart_formdata(qparams.items(), filedata) req_headers = config.headers.copy() req_headers['Content-type'] = content_type req_headers['Content-length'] = len(body) req_url = urljoin(self.base, config.urls['attach_post']) req = Request(req_url, body, req_headers) if self.httpuser and self.httppassword: base64string = base64.encodestring('%s:%s' % (self.httpuser, self.httppassword))[:-1] req.add_header("Authorization", "Basic %s" % base64string) resp = self.opener.open(req) # TODO: return attachment id and success? try: re_success = re.compile(r'<title>Changes Submitted</title>') if re_success.search(resp.read()): return True except: pass return False
class Connection: ENCODING = 'gb18030' USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:9.0.1) Gecko/20100101 Firefox/9.0.1' BBS_URL = 'http://bbs.nju.edu.cn/' DATE_FORMAT = '%b %d %H:%M' LINE_WIDTH = 40 base_url = 'http://bbs.nju.edu.cn/' def __init__(self, session=None): self._cj = CookieJar() self._opener = urllib2.build_opener( urllib2.HTTPCookieProcessor(self._cj)) self._opener.addheaders = [('User-Agent', self.USER_AGENT)] self._opener.addheaders = [('Referer', self.BBS_URL)] if session: self.load_session(session) def _do_action(self, action, params=None, data=None): args = [] if params: for k, v in params.items(): if isinstance(v, list): args += ['{0}={1}'.format(k, i) for i in v] else: args.append('{0}={1}'.format(k, v)) url = self.base_url + action + ('?' if args else '') + '&'.join(args) logger.debug(url) body = [] if data: for k, v in data.items(): body.append('{0}={1}'.format( quote(k), quote(unicode(v).encode(self.ENCODING)))) try: response = self._opener.open(url, '&'.join(body) if data else None) except URLError: raise NetworkError() # decode() in py2.6 does not support `errors` kwarg. html = response.read().decode(self.ENCODING, 'ignore') # TODO: BeautifulSoup still needs this? html = html.replace(u'<nobr>', u'') # damn it return html def load_session(self, session): from utils import make_cookie self.base_url = '{0}vd{1}/'.format(self.BBS_URL, session.vd) self._cj.set_cookie(make_cookie('_U_KEY', session.key)) self._cj.set_cookie(make_cookie('_U_UID', session.uid)) self._cj.set_cookie(make_cookie('_U_NUM', session.num)) def is_logged_in(self): html = self._do_action('bbsfoot') return html.find('bbsqry?userid=guest') == -1 def login(self, username, password): ''' return Session if successful else None ''' from random import randint session = Session() session.vd = str(randint(10000, 100000)) self.base_url = '{0}vd{1}/'.format(self.BBS_URL, session.vd) params = {'type': 2} data = {u'id': username, u'pw': password} html = self._do_action('bbslogin', params, data) try: s = re.search(r"setCookie\('(.*)'\)", html).group(1) except AttributeError: return None s = s.split('+') session.key = str(int(s[-1]) - 2) s = s[0].split('N') session.uid = s[-1] session.num = str(int(s[0]) + 2) self.load_session(session) return session def logout(self, session=None): if session: self.load_session(session) data = {'Submit': u'注销登录'.encode(self.ENCODING)} self._do_action('bbslogout', '', data) self._cj.clear() self.base_url = self.BBS_URL def compose(self, board, title, body, pid=None, gid=None, signature=0): ''' XXX: unicode ''' params = {'board': board} lines = body.split(u'\r\n') body = [] for i in lines: body.append(u'\r\n'.join(wrap(i, self.LINE_WIDTH))) body = u'\r\n'.join(body) data = {'title': title, 'text': body} if pid is not None: data['reid'] = pid data['pid'] = gid data['signature'] = signature html = self._do_action('bbssnd', params, data) return 'Refresh' in html def fetch_post(self, board, pid, num): params = {'board': board, 'file': pid2str(pid), 'num': num} html = self._do_action('bbscon', params) soup = BeautifulSoup(html) txt = soup.find('textarea').text ret = Post(board, pid, num) ret.parse_post(txt) # TODO: works for 'x' post s = soup.findAll('a')[-1]['href'] gid = parse_qs(urlparse(s).query).get('gid', None) if gid is not None: ret.gid = gid[0] else: ret.gid = None return ret def fetch_topic(self, board, pid, start=None): params = {'board': board, 'file': pid2str(pid)} if start: params['start'] = start html = self._do_action('bbstcon', params) soup = BeautifulSoup(html) ret = Topic(board, pid) items = soup.findAll('table', {'class': 'main'}) if not items: raise ContentError() for i in items: c = i.tr.td.a['href'] p = Post(board, parse_pid(c), parse_num(c)) c = i.findAll('tr')[1].td.textarea.text p.parse_post(c) ret.post_list.append(p) for i in soup.body.center.findAll('a', recursive=False, limit=3): if i.text == u'本主题下30篇': ret.next_start = int(parse_href(i['href'], 'start')) return ret def fetch_page(self, board, start=None): params = {'board': board} if start: params['start'] = start html = self._do_action('bbstdoc', params) soup = BeautifulSoup(html) items = soup.findAll('tr')[1:] year = datetime.now().year ret = Page(board) for i in items: cells = i.findAll('td') h = Header() h.board = board try: h.num = int(cells[0].text) - 1 except ValueError: continue h.author = cells[2].text.strip() h.date = cells[3].text.strip() h.date = datetime.strptime(h.date, self.DATE_FORMAT) h.date = h.date.replace(year=year) h.title = cells[4].text.strip()[2:] h.pid = parse_pid(cells[4].a['href']) tmp = cells[5].text.strip() if tmp.find('/') != -1: tmp = tmp.split('/') h.reply_count = int(tmp[0]) h.view_count = int(tmp[1]) else: h.view_count = int(tmp) ret.header_list.append(h) # TODO for i in soup.body.center.findAll('a', recursive=False): if i.text == u'上一页': ret.prev_start = int(parse_href(i['href'], 'start')) - 1 return ret def fetch_top10(self): html = self._do_action('bbstop10') soup = BeautifulSoup(html) items = soup.findAll('tr')[1:] ret = Page(u'全站十大') for i in items: cells = i.findAll('td') h = Header() h.board = cells[1].text.strip() h.title = cells[2].text.strip() h.pid = parse_pid(cells[2].a['href']) h.author = cells[3].text.strip() h.reply_count = int(cells[4].text.strip()) ret.header_list.append(h) return ret def fetch_hot(self): html = self._do_action('bbstopall') soup = BeautifulSoup(html) items = soup.findAll('tr') ret = [] tmp = None for i in items: if i.img: tmp = [] continue cells = i.findAll('td') if not cells[0].text: ret.append(tmp) continue for j in cells: h = Header() links = j.findAll('a') h.title = links[0].text.strip() h.board = links[1].text.strip() h.pid = parse_pid(links[0]['href']) tmp.append(h) return ret def fetch_favorites(self): html = self._do_action('bbsleft') soup = BeautifulSoup(html) div = soup.findAll('div', {'id': 'div0'}) if not div: raise Error() items = div[0] items = items.findAll('a')[:-1] ret = [i.text for i in items] return ret def fetch_board_list(self): from time import sleep ret = BoardManager() for i in range(12): sleep(1) html = self._do_action('bbsboa', {'sec': i}) soup = BeautifulSoup(html) try: text = re.search(ur'\[(\w+?)区\]<hr', html, re.UNICODE).group(1) except AttributeError: raise ContentError(u'请勿过快刷新页面') section = Section(i, text) items = soup.findAll('tr')[1:] for i in items: cells = i.findAll('td') s = cells[5].text[2:] # Some board may have a voting in progress if s.endswith(u'V'): s = s[:-1] board = Board(cells[2].text, s) section.board_list.append(board) ret.add(section) return ret def fetch_face_list(self): html = self._do_action('editor/face.htm', {'ptext': 'text'}) soup = BeautifulSoup(html) items = soup.findAll('img') ret = {} for i in items: ret[i['title']] = i['src'] with open('FaceList.json', 'w') as f: json.dump(ret, f)