Esempio n. 1
0
def grab_ticket(params, wait, lock, thread_id):
    global stop
    cj = CookieJar()
    opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
    buy_params, headers = init_session(opener, params)
    while stop == False and buy_params == None:
        cj.clear()
        buy_params, headers = init_session(opener, params)
    # wait
    wait_time = (23 * 60 + 30) * 60 + 45
    while stop == False and wait == True:
        now_time = datetime.now()
        current_time = (now_time.hour * 60 +
                        now_time.minute) * 60 + now_time.second
        if current_time >= wait_time:
            cj.clear()
            buy_params, headers = init_session(opener, params)
            break
    # start
    while stop == False and buy_ticket(opener, buy_params, headers, lock,
                                       thread_id) == False:
        diff_days = abs(datetime.now().date() - datetime.strptime(
            buy_params['getin_date'][:10], '%Y/%m/%d').date()).days
        buy_params['getin_date'] = '%s-%d' % (buy_params['getin_date'][:10],
                                              diff_days)
        time.sleep(3)
    stop = True
    thread.exit()
Esempio n. 2
0
 def clear(self, domain=None, path=None, name=None):
   if issubclass(CookieJar, object):
     super(KeyringCookieJar, self).clear(domain, path, name)
   else:
     # old-style class in Python 2
     CookieJar.clear(self, domain, path, name)
   self.nuke()
   self.save()
Esempio n. 3
0
class Yad2Client(object):
    def __init__(self):
        proxy = ProxyHandler(PROXY)
        self.cj = CookieJar()
        opener = build_opener(HTTPCookieProcessor(self.cj), proxy)
        install_opener(opener)

    def add_cookie(self, name, value):
        cookie = Cookie(version=0,
                        name=name,
                        value=value,
                        port=None,
                        port_specified=False,
                        domain='yad2.co.il',
                        domain_specified=False,
                        domain_initial_dot=False,
                        path='/',
                        path_specified=True,
                        secure=False,
                        expires=None,
                        discard=True,
                        comment=None,
                        comment_url=None,
                        rest={'HttpOnly': None},
                        rfc2109=False)

        self.cj.set_cookie(cookie)

    def clear_cookies(self):
        self.cj.clear()

    def get_url(self, url, headers={}, args={}):
        headers["Host"] = "m.yad2.co.il"
        headers[
            "Accept"] = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"
        headers[
            "User-Agent"] = "Mozilla/5.0 (Linux; Android 4.2.2; Android SDK built for x86 Build/KK) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/30.0.0.0 Mobile Safari/537.36"
        headers["Accept-Language"] = "en-US"

        args = args or {}
        args["DeviceType"] = "Redmi Note 3"
        args["AppVersion"] = "2.9"
        args["AppType"] = "Android"
        args["OSVersion"] = "5.0.2"
        args["udid"] = "582ffa3d-a4cf-425a-8b36-9874d7464015"

        url = url + "?" + urlencode(args)
        req = Request(url, headers=headers)
        response = urlopen(req)

        return response.read()
def get_cookies_from_response(url):
    cookiejar = CookieJar()

    opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookiejar))
    opener.open(url)

    # add a new cookie or replace a old one
    newcookie = make_cookie('newcookie', '11111', '.baidu.com', '/')

    # remove a cookie
    cookiejar.set_cookie(newcookie)
    cookiejar.clear('.baidu.com', '/', 'newcookie')

    return cookiejar
def get_cookies_from_response(url):
    cookiejar = CookieJar()

    opener = urllib2.build_opener(
        urllib2.HTTPCookieProcessor(cookiejar))
    opener.open(url)

    # add a new cookie or replace a old one
    newcookie = make_cookie('newcookie', '11111', '.baidu.com', '/')

    # remove a cookie
    cookiejar.set_cookie(newcookie)
    cookiejar.clear('.baidu.com', '/', 'newcookie')

    return cookiejar
Esempio n. 6
0
class Yad2Client(object):

    def __init__(self):        
        proxy = ProxyHandler(PROXY)
        self.cj = CookieJar()
        opener = build_opener(HTTPCookieProcessor(self.cj), proxy)
        install_opener(opener)


    def add_cookie(self, name, value):
        cookie =  Cookie(version=0, name=name, value=value, port=None,
                         port_specified=False, domain='yad2.co.il',
                         domain_specified=False, domain_initial_dot=False,
                         path='/', path_specified=True, secure=False, expires=None,
                         discard=True, comment=None, comment_url=None,
                         rest={'HttpOnly': None}, rfc2109=False)
        
        self.cj.set_cookie(cookie)


    def clear_cookies(self):
        self.cj.clear()


    def get_url(self, url, headers = {}, args = {}):
        headers["Host"] = "m.yad2.co.il"
        headers["Accept"] = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"
        headers["User-Agent"] = "Mozilla/5.0 (Linux; Android 4.2.2; Android SDK built for x86 Build/KK) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/30.0.0.0 Mobile Safari/537.36"
        headers["Accept-Language"] = "en-US"

        args = args or {}
        args["DeviceType"] = "Redmi Note 3"
        args["AppVersion"] = "2.9"
        args["AppType"] = "Android"
        args["OSVersion"] = "5.0.2"
        args["udid"] = "582ffa3d-a4cf-425a-8b36-9874d7464015"
                
        url = url + "?" + urlencode(args)
        req = Request(url, headers = headers)
        response = urlopen(req)

        return response.read()
Esempio n. 7
0
class HLSFetcher(object):

	def __init__(self, url, **options):

		self.program = options.get('program',1)
		self.hls_headers = options.get('headers',{})
		self.path = options.get('path',None)
		self.bitrate = options.get('bitrate',200000)
		self.nbuffer = options.get('buffer',5)
		self.n_segments_keep = options.get('keep',self.nbuffer+1)
		url = urllib.unquote(url)
		self.puser = options.get('puser')
		self.ppass = options.get('ppass')
		self.purl = options.get('purl')

		us = url.split('|')
		if len(us) > 1:
			self.url = us[0]
			for hd in us[1:]:
				self.hls_headers.update(dict(urlparse.parse_qsl(hd.strip())))
		else:
			self.url = url

		self.agent = self.hls_headers.pop('User-Agent', getUserAgent())
		if not self.path:
			self.path = tempfile.mkdtemp()

		self._program_playlist = None
		self._file_playlist = None
		self._cookies = CookieJar()
		self._cached_files = {} 	# sequence n -> path
		self._run = True
		self._poolHelper = TwHTTP11PoolHelper(retryAutomatically=True)

		self._files = None 			# the iter of the playlist files download
		self._next_download = None 	# the delayed download defer, if any
		self._file_playlisted = None # the defer to wait until new files are added to playlist
		self._new_filed = None
		self._seg_task = None

	def _get_page(self, url):
		url = url.encode("utf-8")
		if 'HLS_RESET_COOKIES' in os.environ.keys():
			self._cookies.clear()

		timeout = 10
		return twAgentGetPage(url, agent=self.agent, cookieJar=self._cookies, headers=self.hls_headers, timeout=timeout, pool=self._poolHelper._pool, proxy_url=self.purl, p_user=self.puser, p_pass=self.ppass)

	def _download_page(self, url, path, file):
		def _decrypt(data):
			def num_to_iv(n):
				iv = struct.pack(">8xq", n)
				return b"\x00" * (16 - len(iv)) + iv

			if not self._file_playlist._iv:
				iv = num_to_iv(file['sequence'])
				aes = AES.new(self._file_playlist._key, AES.MODE_CBC, iv)
			else:
				aes = AES.new(self._file_playlist._key, AES.MODE_CBC, self._file_playlist._iv)
			return aes.decrypt(data)

		d = self._get_page(url)
		if self._file_playlist._key:
			d.addCallback(_decrypt)
		return d

	def _download_segment(self, f):
		url = make_url(self._file_playlist.url, f['file'])
		name = 'seg_' + next(tempfile._get_candidate_names())
		path = os.path.join(self.path, name)
		d = self._download_page(url, path, f)
		if self.n_segments_keep != 0:
			file = open(path, 'wb')
			d.addCallback(lambda x: file.write(x))
			d.addBoth(lambda _: file.close())
			d.addCallback(lambda _: path)
			d.addErrback(self._got_file_failed)
			d.addCallback(self._got_file, url, f)
		else:
			d.addCallback(lambda _: (None, path, f))
		return d

	def delete_cache(self, f):
		bgFileEraser = eBackgroundFileEraser.getInstance()
		keys = self._cached_files.keys()
		for i in ifilter(f, keys):
			filename = self._cached_files[i]
			bgFileEraser.erase(str(filename))
			del self._cached_files[i]

	def delete_all_cache(self):
		bgFileEraser = eBackgroundFileEraser.getInstance()
		for path in self._cached_files.itervalues():
			bgFileEraser.erase(str(path))
		self._cached_files.clear()

	def _got_file_failed(self, e):
		if self._new_filed:
			self._new_filed.errback(e)
			self._new_filed = None

	def _got_file(self, path, url, f):
		self._cached_files[f['sequence']] = path
		if self.n_segments_keep != -1:
			self.delete_cache(lambda x: x <= f['sequence'] - self.n_segments_keep)
		if self._new_filed:
			self._new_filed.callback((path, url, f))
			self._new_filed = None
		return (path, url, f)

	def _get_next_file(self):
		next = self._files.next()
		if next:
			return self._download_segment(next)
		elif not self._file_playlist.endlist():
			self._seg_task.stop()
			self._file_playlisted = defer.Deferred()
			self._file_playlisted.addCallback(lambda x: self._get_next_file())
			self._file_playlisted.addCallback(self._next_file_delay)
			self._file_playlisted.addCallback(self._seg_task.start)
			return self._file_playlisted

	def _handle_end(self, failure):
		failure.trap(StopIteration)
		print "End of media"

	def _next_file_delay(self, f):
		if f == None: return 0
		delay = f[2]["duration"]
		if self.nbuffer > 0:
			for i in range(0,self.nbuffer):
				if self._cached_files.has_key(f[2]['sequence'] - i):
					return delay
			delay = 0
		elif self._file_playlist.endlist():
			delay = 1
		return delay

	def _get_files_loop(self, res=None):
		if not self._seg_task:
			self._seg_task = task.LoopingCall(self._get_next_file)
		d = self._get_next_file()
		if d != None:
			self._seg_task.stop()
			d.addCallback(self._next_file_delay)
			d.addCallback(self._seg_task.start)
			d.addErrback(self._handle_end)

	def _playlist_updated(self, pl):
		if pl and pl.has_programs():
			# if we got a program playlist, save it and start a program
			self._program_playlist = pl
			(program_url, _) = pl.get_program_playlist(self.program, self.bitrate)
			return self._reload_playlist(M3U8(program_url, self._cookies, self.hls_headers))
		elif pl and pl.has_files():
			# we got sequence playlist, start reloading it regularly, and get files
			self._file_playlist = pl
			if not self._files:
				self._files = pl.iter_files()
			if not pl.endlist():
				reactor.callLater(pl.reload_delay(), self._reload_playlist, pl)
			if self._file_playlisted:
				self._file_playlisted.callback(pl)
				self._file_playlisted = None
		else:
			raise Exception('Playlist has no valid content.')
		return pl

	def _got_playlist_content(self, content, pl):
		if not pl.update(content) and self._run:
			# if the playlist cannot be loaded, start a reload timer
			d = deferLater(reactor, pl.reload_delay(), self._fetch_playlist, pl)
			d.addCallback(self._got_playlist_content, pl)
			return d
		return pl

	def _fetch_playlist(self, pl):
		d = self._get_page(pl.url)
		return d

	def _reload_playlist(self, pl):
		if self._run:
			d = self._fetch_playlist(pl)
			d.addCallback(self._got_playlist_content, pl)
			d.addCallback(self._playlist_updated)
			return d
		else:
			return None

	def get_file(self, sequence):
		d = defer.Deferred()
		keys = self._cached_files.keys()
		try:
			endlist = sequence == self._file_playlist._end_sequence
			sequence = ifilter(lambda x: x >= sequence, keys).next()
			filename = self._cached_files[sequence]
			d.callback((filename, endlist))
		except:
			d.addCallback(lambda x: self.get_file(sequence))
			self._new_filed = d
			keys.sort()
		return d

	def _start_get_files(self, x):
		self._new_filed = defer.Deferred()
		self._get_files_loop()
		return self._new_filed

	def start(self):
		if self._run:
			self._files = None
			d = self._reload_playlist(M3U8(self.url, self._cookies, self.hls_headers))
			d.addCallback(self._start_get_files)
			return d

	def stop(self):
		self._run = False
		self._poolHelper.close()
		if self._seg_task != None:
			self._seg_task.stop()
		if self._new_filed != None:
			self._new_filed.cancel()
		reactor.callLater(1, self.delete_all_cache)
Esempio n. 8
0
class Downloader(object):

    def __init__(self, username=None, password=None,
                 debug=False, naptime=True,
                 user_agent=DEFAULT_USER_AGENT):
        self.sleep_after_request = naptime
        self.user_agent = user_agent
        self.debug = debug

        # Try setting the username from args, if missing check the authfile
        self.username = username or auth.lc_username
        self.password = password or auth.lc_password
        self.logged_in = False

        self.cookie_jar = CookieJar()
        if self.debug:
            # Noisy HTTPS handler for debugging
            self.url_opener = build_opener(
                HTTPCookieProcessor(self.cookie_jar),
                HTTPSHandler(debuglevel=1))
        else:
            self.url_opener = build_opener(
                HTTPCookieProcessor(self.cookie_jar))

        self.url_opener.addheaders = [
            ('User-Agent', self.user_agent)
        ]

        logging.info('Downloader intialized.')

    def open_url(self, url, data=None, method='GET'):
        """
        Consistent place to introduce request throttling
        and other HTTP magic
        """

        if method == 'GET':
            if data:
                url += "?" + urlencode(data, True)
            response = self.url_opener.open(url)

        elif method == 'POST':
            response = self.url_opener.open(url, data=urlencode(data))

        else:
            raise ValueError("%s is not a valid HTTP method" % method)

        if self.sleep_after_request:
            sleep_time = random.randint(MIN_SLEEP, MAX_SLEEP)
            logging.debug('Taking a nap for %s seconds', sleep_time)
            time.sleep(sleep_time)

        return response

    def verify_login(self, resp=None):
        """
        Tries to fetch the Account Summary page,
        returns true if it succeeds

        Args:
            resp (HTTPResponse, optional) -
                 reuse resp instead of re-querying

        Returns: True if the we're actually logged in;
                 also updates self.logged_in
        """

        if not resp:
            resp = self.open_url(ACCOUNT_SUMMARY_URL)

        resp_text = resp.read()

        # Look for a known tag that appears only for logged in users
        if resp_text.find(LOGGED_IN_VALIDATION) >= 0:
            self.logged_in = True
        else:
            self.logged_in = False

        return self.logged_in

    def logout(self):
        try:
            self.cookie_jar.clear('.lendingclub.com')
        except KeyError:
            pass

        logging.debug('Cleared cookies')

    def login(self, invalidate_session=False, retries=5):
        """ Sets an actively logged in session with Lending Club.

        Login Steps:
            1. Get a set of session cookies by visiting ACCOUNT_SUMMARY_URL
            2. Authenticate the session cookies with a username / password

        If self.logged_in is already set, this will verify that we're logged in

        Args:
            invalidate_session (bool): will clear cookies and
                log in with a new session.
            retries (int): number of times to retry on unsuccessful login

        Returns: True if login was successful, also updates self.logged_in

        """

        if self.logged_in and not invalidate_session and self.verify_login():
            # Ensure we're logged in and aren't trying to reset our session
            logging.debug('Ensuring that we already have an active session')
            return self.logged_in

        if not self.logged_in or invalidate_session:
            # Start a new session: clear cookies, and get send a fresh request
            self.logout()

            logging.debug('Starting a fresh Lending Club session..')
            self.open_url(ACCOUNT_SUMMARY_URL)
            logging.debug('session cookies: %s', self.cookie_jar)

        attempt = 1
        while attempt <= retries:

            # Username and password only need to input once
            if not self.username:
                self.username = raw_input('Lending Club username:\n')

            if not self.password:
                self.password = getpass('Password:\n')

            data = {
                'login_url': ACCOUNT_SUMMARY_URL,
                'login_email': self.username,
                'login_password': self.password,
                'login_remember_me': 'off',
            }

            response = self.open_url(LOGIN_URL, data, 'POST')

            # Validate the login attempt
            if self.verify_login(response):
                self.logged_in = True

                # We dont need the LC_FIRSTNAME cookie that was just set
                self.cookie_jar.clear('.lendingclub.com', '/', 'LC_FIRSTNAME')

                logging.info('Successfully logged in as %s', self.username)
                break

            else:
                self.username = None
                self.password = None
                self.logged_in = False
                if attempt < retries:
                    logging.warning(
                        'Login attempt %s of %s failed. Will try again.',
                        attempt, retries)
                else:
                    logging.critical('Last login attempt %s failed.', attempt)

                attempt += 1

        return self.logged_in

    def set_query_params(self):
        """
        Before making requests to NOTES_URL, we need to set the high-level
        search params, like the interest rates and loan status.

        Query params are associated with the session on the server-side.
        """

        request_params = {
            'mode': 'search',
            'search_from_rate': '0.04',
            'search_to_rate': '0.26',
            'search_status': ['status_always_current',
                              'satus_current',
                              'status_late_16_30',
                              'status_late_31_120'],
            'search_remaining_payments': '60',
            'x': '23',
            'y': '10',
        }

        logging.debug('Setting up the query params..')

        self.open_url(QUERY_PARAMS_URL, request_params)

    def get_page_of_notes(self, sort='ytm', sort_dir='desc',
                          offset=0, limit=10, retries=5):
        """ Given a session cookie, get a page of results in a JSON format """

        request_params = {
            'sortBy': sort,
            'dir': sort_dir,
            'newrdnnum': random.randint(10000000, 90000000),  # What is this?
            'startindex': offset,
            'pagesize': limit,
        }

        QUERY_STATUS_KEY = 'result'

        attempt = 1
        while attempt <= retries:
            try:
                response = self.open_url(NOTES_URL, request_params)
                response_data = response.readline()
                json_data = json.loads(response_data)
                query_status = json_data.get(QUERY_STATUS_KEY)
                if query_status == 'success':
                    return json_data
            except Exception as e:
                log_line = '[%d/%d]: Error parsing response: %s\n RESP: %s' % (
                    attempt, retries, e, response_data)
                logging.warning(log_line)
            else:
                log_line = '[%d/%d] Failed to fetch data. \n RESP: %s' % (
                    attempt, retries, json_data)
                logging.warning(log_line)

            attempt += 1

        # Escalate logging to ERROR if we fail fetching after many retries
        logging.critical('Error fetching page of notes after %d tries.\n > %s',
                         retries, log_line)

        return {}

    def download_data(self, max_records=1000, pagesize=1000,
                      ignore_neg_ytm=False):
        """ Paginate through enough pages of results to get the desired
        number of records. Optionally ignore negative YTM to reduce
        the result set.
        """

        RECORD_COUNT_KEY = 'totalRecords'
        RESULT_SET_KEY = 'searchresult'
        LOANS_KEY = 'loans'

        # ensure we're logged in
        self.login()

        # Set the high-level search query params
        self.set_query_params()

        # How many results match the query?
        logging.info('Fetching the total matching record count for the query')
        total_record_count = int(
            self.get_page_of_notes(limit=1).get(RECORD_COUNT_KEY, 0))

        # How many results do we plan to fetch?
        record_limit = min(max_records, total_record_count)
        logging.info('Fetching up to %s of %s matching records',
                     record_limit, total_record_count)

        all_records = {}
        offset = 0

        while offset < record_limit:

            logging.debug('Fetched %s; getting %s more records from the site',
                          len(all_records), pagesize)

            # Set the query arguments and fetch the data in a nice dict
            query_args = {'offset': offset, 'limit': pagesize, }

            if ignore_neg_ytm:
                # Start with positive YTM and descend,
                # this allows ignoring negatives
                query_args['sort'] = 'ytm'
                query_args['sort_dir'] = 'desc'

            fetched_data = self.get_page_of_notes(**query_args)

            # Break out early if we're not getting sensible results
            if not fetched_data:
                break

            # Get a list of records from the result
            fetched_records = fetched_data.get(
                RESULT_SET_KEY, {}).get(LOANS_KEY, [])

            for record in fetched_records:

                # Break out if we've fetched all of the positive YTM records
                if ignore_neg_ytm and (
                        (record.get('ytm') == 'null') or
                        (float(record.get('ytm', 0)) < 0)):

                    logging.info('Fetched all %s records with a positive YTM',
                                 len(all_records))
                    return all_records

                record_id = record.get('noteId')
                if record_id in all_records:
                    raise KeyError("Looks like we got a duplicate record: %s" %
                                   record)
                all_records[record_id] = record

            offset += pagesize

        return all_records

    def download_historical_loan_data(self):
        logging.info('Downloading file from %s..', LOAN_DATA_CSV_URL)
        urlretrieve(LOAN_DATA_CSV_URL, LOAN_DATA_CSV_TMPFILE)
        logging.info('Done writing to %s', LOAN_DATA_CSV_TMPFILE)
        return parse_loan_data_from_file(LOAN_DATA_CSV_TMPFILE)
Esempio n. 9
0
class Network(DOMMixin):

    capabilities = [
        'cookies',
        'headers',
        ]

    wait_expression = WaitExpression

    user_agent = {
        'browser': 'network',
        'platform': 'python',
        'version': '1.0',
        }

    def __init__(self, base_url=None):
        # accept additional request headers?  (e.g. user agent)
        self._base_url = base_url
        self.reset()

    def open(self, url, wait_for=None, timeout=0):
        """Open web page at *url*."""
        self._open(url)

    def reset(self):
        self._referrer = None
        self._request_environ = None
        self._cookie_jar = CookieJar()
        self._opener = urllib2.build_opener(
            urllib2.HTTPCookieProcessor(self._cookie_jar)
        )
        self.status_code = 0
        self.status = ''
        self.response = None
        self.location = None
        self.headers = ()

    def wait_for(self, condition, timeout=None):
        pass

    def sync_document(self):
        """The document is always synced."""

    _sync_document = DOMMixin.sync_document

    @property
    def cookies(self):
        if not (self._cookie_jar and self.location):
            return {}
        request = urllib2.Request(self.location)
        policy = self._cookie_jar._policy

        # return ok will only return a cookie if the following attrs are set
        # correctly => # "version", "verifiability", "secure", "expires",
        # "port", "domain"
        return dict((c.name, c.value.strip('"'))
            for c in self._cookie_jar if policy.return_ok(c, request))

    def set_cookie(self, name, value, domain=None, path=None,
                   session=True, expires=None, port=None):
#        Cookie(version, name, value, port, port_specified,
#                 domain, domain_specified, domain_initial_dot,
#                 path, path_specified, secure, expires,
#                 discard, comment, comment_url, rest,
#                 rfc2109=False):

        cookie = Cookie(0, name, value, port, bool(port),
                        domain or '', bool(domain),
                        (domain and domain.startswith('.')),
                        path or '', bool(path), False, expires,
                        session, None, None, {}, False)
        self._cookie_jar.set_cookie(cookie)

    def delete_cookie(self, name, domain=None, path=None):
        try:
            self._cookie_jar.clear(domain, path, name)
        except KeyError:
            pass

    # Internal methods
    @lazy_property
    def _lxml_parser(self):
        return html_parser_for(self, wsgi_elements)

    def _open(self, url, method='GET', data=None, refer=True,
              content_type=None):
        before_browser_activity.send(self)
        open_started = time()

        if data:
            data = urlencode(data)

        url = urljoin(self._base_url, url)
        if method == 'GET':
            if '?' in url:
                url, query_string = url.split('?', 1)
            else:
                query_string = None

            if data:
                query_string = data
            if query_string:
                url = url + '?' + query_string

            request = urllib2.Request(url)
        elif method == 'POST':
            request = urllib2.Request(url, data)
        else:
            raise Exception('Unsupported method: %s' % method)
        if self._referrer and refer:
            request.add_header('Referer', self._referrer)

        logger.info('%s(%s)', url, method)
        request_started = time()

        response = self._opener.open(request)

        request_ended = time()

        self.status_code = response.getcode()
        self.headers = Headers(
            (head.strip().split(': ',1) for head in response.info().headers)
        )
        self._referrer = request.get_full_url()
        self.location = response.geturl()
        self._response = response
        self.response = ''.join(list(response))
        self._sync_document()

        open_ended = time()
        request_time = request_ended - request_started

        logger.info("Fetched %s in %0.3fsec + %0.3fsec browser overhead",
                    url, request_time,
                    open_ended - open_started - request_time)
        after_browser_activity.send(self)
Esempio n. 10
0
class GHttp():
    def __init__(self):
        """
        class initialisation, creates cookie jar and headers
        """

        self.lastpage = None
        self.lasterror = None

        self.cj = CookieJar()

        self.cookieH = urllib2.HTTPCookieProcessor(self.cj)
        self.redirectH = urllib2.HTTPRedirectHandler()
        self.proxyH = None

        self.opener = urllib2.build_opener(self.cookieH, self.redirectH)

    def addproxy(self, proxyipport):
        self.proxyH = urllib2.ProxyHandler({'http': proxyipport})
        self.opener = urllib2.build_opener(self.cookieH, self.redirectH,
                                           self.proxyH)
        if self.rq('http://google.com') is None:
            return False
        return True

    def removeproxy(self):
        """
        Removes the currently set proxy
        """
        self.proxyH = None
        self.opener = urllib2.build_opener(self.cookieH, self.redirectH)

    def clearcookies(self):
        """
        clears all cookies from the cookie jar :)
        """
        self.cj.clear()

    def rq(self, url, ref=None, data=None):
        """
        Http request, it either returns response html or
        none if there's an error.

        Keyword arguments:
        url -- the url you want to request
        data -- this is for the POST method, data that you will be seding
        ref -- the referer to your request page, if none specified it will
               use last page's url or the current url (default None)
        """
        # reset lasterror
        self.lasterror = None

        # set the referrer
        if ref is None:
            if self.lastpage is None:
                self.lastpage = url
            ref = self.lastpage

        self.opener.addheaders = [
            ('Referer', ref),
            ('User-Agent',
             'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.597.98 Safari/534.13'
             )
        ]

        # if data is a dictionary type we will use urllibe to encode it
        # to url format
        if isinstance(data, dict):
            data = urllib.urlencode(data)

        # catch exceptions so program does not crash
        try:
            if data is not None:
                opnr = self.opener.open(url, data=data)
            else:
                opnr = self.opener.open(url)
        except urllib2.HTTPError, e:
            self.lasterror = 'The server couldn\'t fulfill the request.' + \
                             'Error code: %s' % e.code
            return None
        except urllib2.URLError, e:
            self.lasterror = 'We failed to reach a server. Reason: %s' % e.reason
            return None
Esempio n. 11
0
class TestClassBase(unittest.TestCase):
    def setUp(self):

        self.maxDiff = None
        self.execOnTearDown = []

        self.cookies = CookieJar()
        self.opener = urllib2.build_opener(
            urllib2.HTTPCookieProcessor(self.cookies))
        self._assert_unlogged()
        self._login(admin_data[0], 'administrator')
        self._assert_logged(admin_data[0])
        self.admin_data = admin_data

    def _login(self, data, group):
        self.cookies.clear()
        encoded_credentials = urllib.urlencode({
            'username': data['username'],
            'password': data['password']
        })
        self.opener.open(url + "/auth/login", encoded_credentials)
        self.group = group

    def _assert_logged(self, data, notifications=None):

        json_out = self._request('/me', {})

        if notifications != None:
            self.assertEqual(
                {
                    'username': json_out['username'],
                    'group': json_out['group'],
                    'notifications': json_out['notifications']
                }, {
                    'username': data['username'],
                    'group': self.group,
                    'notifications': notifications
                })
        else:
            self.assertEqual(
                {
                    'username': json_out['username'],
                    'group': json_out['group']
                }, {
                    'username': data['username'],
                    'group': self.group
                })

    def _assert_unlogged(self):
        self.assertEqual(
            urllib2.urlopen(url + '/me').geturl(), url + '/auth/login')

    def _request(self, uri, json_in):
        request = urllib2.Request('https://localhost:9090/%s' %
                                  uri.lstrip('/'),
                                  data=json.dumps(json_in),
                                  headers={'Content-Type': 'application/json'})
        return json.loads(self.opener.open(request).read())

    def _assert_req(self, uri, json_in, json_expected):
        json_out = self._request(uri, json_in)
        #         import pprint
        #         print 'RETURNED:', pprint.pprint(clean_id(copy.deepcopy(json_out)))
        #         print 'EXPECTED:', pprint.pprint(json_expected)
        #         print 'EQ:', clean_id(copy.deepcopy(json_out)) == json_expected
        self.assertEqual(clean_id(copy.deepcopy(json_out)), json_expected)
        return json_out

    def tearDown(self):

        self._login(self.admin_data[0], 'administrator')
        self._assert_logged(self.admin_data[0])

        for command in self.execOnTearDown:
            uri, json_in, json_expected = command

            self.assertEqual(clean_id(self._request(uri, json_in)),
                             json_expected)

    def _plain_request(self, uri=''):
        return self.opener.open('https://localhost:9090/' + uri).read()
Esempio n. 12
0
class Downloader(object):

    def __init__(self, username=None, password=None,
                 debug=False, naptime=True,
                 user_agent=DEFAULT_USER_AGENT):
        self.sleep_after_request = naptime
        self.user_agent = user_agent
        self.debug = debug

        # Try setting the username from args
        self.username = username
        self.password = password
        self.logged_in = False

        self.cookie_jar = CookieJar()
        if self.debug:
            # Noisy HTTPS handler for debugging
            self.url_opener = build_opener(
                HTTPCookieProcessor(self.cookie_jar),
                HTTPSHandler(debuglevel=1))
        else:
            self.url_opener = build_opener(
                HTTPCookieProcessor(self.cookie_jar))

        self.url_opener.addheaders = [
            ('User-Agent', self.user_agent)
        ]

        logging.info('Downloader intialized.')

    def open_url(self, url, data=None, method='GET', verify=False):
        """
        Consistent place to introduce request throttling
        and other HTTP magic
        """

        if method != 'GET' and method != 'POST':
            raise ValueError("%s is not a valid HTTP method" % method)

        attempt = 1
        while attempt <= TIMEOUT_RETRY:
            try:
                if method == 'GET':
                    dataurl = url
                    if data:
                        dataurl += "?" + urlencode(data, True)
                    response = self.url_opener.open(dataurl, timeout=TIMEOUT)

                elif method == 'POST':
                    response = self.url_opener.open(url, data=urlencode(data), timeout=TIMEOUT)

                if self.sleep_after_request:
                    sleep_time = random.randint(MIN_SLEEP, MAX_SLEEP)
                    #logging.debug('Taking a nap for %s seconds', sleep_time)
                    time.sleep(sleep_time)

                return response

            except socket.timeout as e:
                logging.warning("Timeout error: %s, [%d/%d] while fetching url %s with data %s", 
                                type(e), attempt, TIMEOUT_RETRY, url, data)
            
            except Exception as e:
                logging.warning("Error caught: %s, [%d/%d] while fetching url %s with data %s", 
                                type(e), attempt, TIMEOUT_RETRY, url, data)

            else:
                logging.warning("Failed to fetch url %s with data %s, [%d/%d]",
                                url, data, attempt, TIMEOUT_RETRY)
              
            attempt = attempt + 1

            if (attempt <= TIMEOUT_RETRY):
                time.sleep(TIMEOUT_SLEEP)

            if verify and not self.verify_login():
                self.login()
        
        # end attemp
        
        logging.critical('Error fetching url %s with data %s after %d tries.', url, str(data), TIMEOUT_RETRY)

        return {}

    def verify_login(self, resp=None):
        """
        Tries to fetch the Account Summary page,
        returns true if it succeeds

        Args:
            resp (HTTPResponse, optional) -
                 reuse resp instead of re-querying

        Returns: True if the we're actually logged in;
                 also updates self.logged_in
        """
        response_got = False

        while not response_got:

            if not resp:
                resp = self.open_url(ACCOUNT_SUMMARY_URL)

            try:
                resp_text = resp.read()
                response_got = True

            except:
                logging.warning("verify_login: cannot read verify response")
                
        # end response_got

        # Look for a known tag that appears only for logged in users
        if resp_text.find(LOGGED_IN_VALIDATION) >= 0:
            self.logged_in = True
        else:
            self.logged_in = False

        return self.logged_in

    def logout(self):
        try:
            self.cookie_jar.clear('.lendingclub.com')
        except KeyError:
            pass

        logging.debug('Cleared cookies')

    def login(self, invalidate_session=False, retries=5):
        """ Sets an actively logged in session with Lending Club.

        Login Steps:
            1. Get a set of session cookies by visiting ACCOUNT_SUMMARY_URL
            2. Authenticate the session cookies with a username / password

        If self.logged_in is already set, this will verify that we're logged in

        Args:
            invalidate_session (bool): will clear cookies and
                log in with a new session.
            retries (int): number of times to retry on unsuccessful login

        Returns: True if login was successful, also updates self.logged_in

        """

        if self.logged_in and not invalidate_session and self.verify_login():
            # Ensure we're logged in and aren't trying to reset our session
            logging.debug('Ensuring that we already have an active session')
            return self.logged_in

        if not self.logged_in or invalidate_session:
            # Start a new session: clear cookies, and get send a fresh request
            self.logout()

            logging.debug('Starting a fresh Lending Club session..')
            self.open_url(ACCOUNT_SUMMARY_URL)
            logging.debug('session cookies: %s', self.cookie_jar)

        attempt = 1
        while attempt <= retries:

            # Username and password only need to input once
            if not self.username:
                self.username = raw_input('Lending Club username:\n')

            if not self.password:
                self.password = getpass('Password:\n')

            data = {
                'login_url': ACCOUNT_SUMMARY_URL,
                'login_email': self.username,
                'login_password': self.password,
                'login_remember_me': 'off',
            }

            response = self.open_url(LOGIN_URL, data, 'POST')

            # Validate the login attempt
            if self.verify_login(response):
                self.logged_in = True

                # We dont need the LC_FIRSTNAME cookie that was just set
                self.cookie_jar.clear('.lendingclub.com', '/', 'LC_FIRSTNAME')

                logging.info('Successfully logged in as %s', self.username)
                break

            else:
                self.username = None
                self.password = None
                self.logged_in = False
                if attempt < retries:
                    logging.warning(
                        'Login attempt %s of %s failed. Will try again.',
                        attempt, retries)
                else:
                    logging.critical('Last login attempt %s failed.', attempt)

                attempt += 1

        return self.logged_in

    def set_query_params(self):
        """
        Before making requests to NOTES_URL, we need to set the high-level
        search params, like the interest rates and loan status.

        Query params are associated with the session on the server-side.
        """

        request_params = {
            'mode': 'search',
            'search_from_rate': '0.04',
            'search_to_rate': '0.29',
            'fil_search_term':['term_36',
                               'term_60'],
            'search_loan_term':['term_36',
                                'term_60'],
            'opr_min':0.00,
            'opr_max':'Any',
            'loan_status':['loan_status_issued',
                           'loan_status_late_16_30',
                           'loan_status_current',
                           'loan_status_late_31_120',
                           'loan_status_ingrace'],
            'remp_min':1,
            'remp_max':60,
            'askp_min':0.00,
            'askp_max':'Any',
            'credit_score_min':600,
            'credit_score_max':850,
            'ytm_min':0,
            'ytm_max':'Any',
            'credit_score_trend':['UP',
                                  'DOWN',
                                  'FLAT'],
            'markup_dis_min':-100,
            'markup_dis_max':100,
            'ona_min':25,
            'ona_max':'Any'
        }
        logging.debug('Setting up the query params..')

        self.open_url(QUERY_PARAMS_URL, request_params)

    def get_page_of_notes(self, sort='opa', sort_dir='asc',
                          offset=0, limit=15, retries=5):
        """ Given a session cookie, get a page of results in a JSON format """

        request_params = {
            'sortBy': sort,
            'dir': sort_dir,
            'newrdnnum': random.randint(10000000, 90000000),  # What is this?
            'startindex': offset,
            'pagesize': limit,
        }

        QUERY_STATUS_KEY = 'result'

        response = self.open_url(NOTES_URL, request_params, verify = True)
        try:
            response_data = response.readline()
            json_data = json.loads(response_data)
            query_status = json_data.get(QUERY_STATUS_KEY)
            if query_status == 'success':
                return json_data
        except Exception as e:
            log_line = 'Error parsing response: %s\n RESP: %s' % (
                e, response_data)
            logging.warning(log_line)
        else:
            log_line = 'Failed to fetch data. \n RESP: %s' % (
                json_data)
            logging.warning(log_line)


        # Escalate logging to ERROR if we fail fetching after many retries
        logging.critical('Error fetching page of notes after %d tries.\n > %s',
                         retries, log_line)

        return {}

    def get_note_details(self, record):
        request_params = {
            'loan_id': record.get('loanGUID'),
            'order_id': record.get('orderId'),
            'note_id': record.get('noteId'),
            'showfoliofn': 'true'
        }
        QUERY_STATUS_KEY = 'result'
        
        response = self.open_url(NOTE_INFO_BASE_URL, request_params, verify = True)

        try:
            response_page = response.read()
            note_parser = NoteHTMLParser(response_page)
            note_info = note_parser.get_info()
            query_status = note_info.get(QUERY_STATUS_KEY)
            if query_status == True:
                return note_info
        except Exception as e:
            log_line = 'get_note_details: Error parsing response: %s\n RESP: %s' % (
                e, response)
            logging.warning(log_line)
        else:
            log_line = 'get_note_details: Failed to parse the response data for record %s' % (
                record)
            logging.warning(log_line)

        return {}

    def get_loan_details(self, record):
        request_params = {
            'loan_id': record.get('loanGUID')
        }
        QUERY_STATUS_KEY = 'result'
        
        response = self.open_url(LOAN_INFO_BASE_URL, request_params, verify = True)

        try:
            response_page = response.read()
            loan_parser = LoanHTMLParser(response_page)
            loan_info = loan_parser.get_info()
            query_status = loan_info.get(QUERY_STATUS_KEY)
            if query_status == True:
                return loan_info
        except Exception as e:
            log_line = 'get_loan_details: Error parsing response: %s\n RESP: %s' % (
                e, response)
            logging.warning(log_line)
        else:
            log_line = 'get_loan_details: Failed to fetch data for record %s' % (
                record)
            logging.warning(log_line)

        return {}

    def format_record_detail(self, note_id, note_detail, loan_detail):
        formated = loan_detail
        formated['note_id'] = note_id
        formated.update(note_detail)
        return formated

    def download_note_details(self, mongo_manager, pagesize=250):
        """ download note details from lc using records stored in mongo_manager 
        """
        
        logging.info('Fetching records from mongo_manager')

        all_record_ids = mongo_manager.get_records()

        total_record_count = len(all_record_ids)

        logging.info('Fetched %s record', total_record_count)

        self.login()
        logging.info('Start downloading at %s' % str(datetime.now()))
        
        count = 1
        page_record_details = {}
        start_time = time.time()

        for note_id, record_ids in all_record_ids.iteritems():

            logging.debug('Fetching note %s, loan_id %s, order_id %s', 
                          record_ids['noteId'], record_ids['loanGUID'], record_ids['orderId'])

            note_detail = self.get_note_details(record_ids)
            loan_detail = self.get_loan_details(record_ids)
            if not note_detail or not loan_detail:
                logging.warning('Failed to fetch note %s, omitting that', record_ids['noteId'])
                continue

            record_detail = self.format_record_detail(note_id, note_detail, loan_detail)
            page_record_details[note_id] = record_detail
        
            if (count % pagesize == 0):
                mongo_manager.add_note_details(page_record_details)
                logging.info('Fetched %s records, %.2f mins elapsed..', count, (time.time() - start_time)/60)
                page_record_details = {}
                time.sleep(1)

            count = count+1
        # end loop of record

        logging.info('Fetched %s records; download complete at %s. %.2f min elapsed.', 
                     count, str(datetime.now()), (time.time() - start_time)/60)

    def download_data(self, max_records=250, pagesize=250, mongo_manager=None, download_details=True):
        """ Paginate through enough pages of results to get the desired
        number of records. Optionally ignore negative YTM to reduce
        the result set.
        """

        RECORD_COUNT_KEY = 'totalRecords'
        RESULT_SET_KEY = 'searchresult'
        LOANS_KEY = 'loans'

        # ensure we're logged in
        self.login()

        # Set the high-level search query params
        self.set_query_params()

        # How many results match the query?
        logging.info('Fetching the total matching record count for the query')

        total_record_count = int(
            self.get_page_of_notes(limit=1).get(RECORD_COUNT_KEY, 0))

        # How many results do we plan to fetch?
        record_limit = min(max_records, total_record_count)
        logging.info('Fetching up to %s of %s matching records',
                     record_limit, total_record_count)

        all_records = {}
        records_set = Set()

        offset = 0

        logging.info('Start downloading at %s' % str(datetime.now()))
        start_time = time.time()

        while offset < record_limit:
            
            logging.debug('Fetched %s; getting %s more records from the site',
                          len(records_set), pagesize)

            # Set the query arguments and fetch the data in a nice dict
            query_args = {'offset': offset, 'limit': pagesize, }

            fetched_data = self.get_page_of_notes(**query_args)

            # Break out early if we're not getting sensible results
            if not fetched_data:
                break

            # Get a list of records from the result
            fetched_records = fetched_data.get(
                RESULT_SET_KEY, {}).get(LOANS_KEY, [])
            
            page_record_details = {}
            page_record_ids = {}

            for record in fetched_records:

                note_id = record.get('noteId')
                if note_id in records_set:
                    logging.warning('Looks like we got a duplicate record: %s', record)

                if mongo_manager :
                    if (download_details):
                        note_detail = self.get_note_details(record)
                        loan_detail = self.get_loan_details(record)
                        record_detail = self.format_record_detail(note_id, note_detail, loan_detail)
                        page_record_details[note_id] = record_detail
                    else:
                        record_ids = {}
                        record_ids['loan_id'] = record.get('loanGUID')
                        record_ids['order_id'] = record.get('orderId')
                        record_ids['note_id'] = record.get('noteId')
                        page_record_ids[note_id] = record_ids
                else:
                    all_records[note_id] = record

                records_set.add(note_id)
            # end loop of record

            if mongo_manager :
                if (download_details):
                    mongo_manager.add_note_detail(page_record_details)
                else:
                    mongo_manager.add_note_ids(page_record_ids)

            offset += pagesize
        # end loop of pages

        logging.info('Fetched %s records; download complete at %s. %.2f min elapsed.', 
                     len(records_set), str(datetime.now()), (time.time() - start_time)/60)

        return all_records

    def download_historical_loan_data(self):
        logging.info('Downloading file from %s..', LOAN_DATA_CSV_URL)
        urlretrieve(LOAN_DATA_CSV_URL, LOAN_DATA_CSV_TMPFILE)
        logging.info('Done writing to %s', LOAN_DATA_CSV_TMPFILE)
        return parse_loan_data_from_file(LOAN_DATA_CSV_TMPFILE)
Esempio n. 13
0
class HttpPostPublisher(BasePublisher):
    """
    Publish metrics via HTTP POST
    """
    def __init__(self,
                 username,
                 password,
                 url='https://localhost:8443/api/metrics/store',
                 buflen=defaultMetricBufferSize,
                 pubfreq=defaultPublishFrequency):
        super(HttpPostPublisher, self).__init__(buflen, pubfreq)
        self._username = username
        self._password = password
        self._needsAuth = False
        self._authenticated = False
        if self._username:
            self._needsAuth = True
        self._cookieJar = CookieJar()
        self._agent = CookieAgent(Agent(reactor), self._cookieJar)
        self._url = url
        self._agent_suffix = os.path.basename(
            sys.argv[0].rstrip(".py")) if sys.argv[0] else "python"
        reactor.addSystemEventTrigger('before', 'shutdown', self._shutdown)

    def _metrics_published(self, response, llen, remaining=0):
        if response.code != 200:
            if response.code == UNAUTHORIZED:
                self._authenticated = False
                self._cookieJar.clear()
            raise IOError("Expected HTTP 200, but received %d from %s" %
                          (response.code, self._url))

        if self._needsAuth:
            self._authenticated = True
        log.debug("published %d metrics and received response: %s", llen,
                  response.code)
        finished = defer.Deferred()
        response.deliverBody(ResponseReceiver(finished))
        if remaining:
            reactor.callLater(0, self._put, False)
        return finished

    def _response_finished(self, result):
        # The most likely result is the HTTP response from a successful POST,
        # which should be JSON formatted.
        if isinstance(result, str):
            log.debug("response was: %s", json.loads(result))
        # We could be called back because _publish_failed was called before us
        elif isinstance(result, int):
            log.info("queue still contains %d metrics", result)
        # Or something strange could have happend
        else:
            log.warn("Unexpected result: %s", result)

    def _shutdown(self):
        log.debug('shutting down [publishing]')
        if len(self._mq):
            self._make_request()

    def _make_request(self):
        metrics = []
        for x in xrange(HTTP_BATCH):
            if not self._mq:
                break
            metrics.append(self._mq.popleft())
        if not metrics:
            return defer.succeed(None)

        serialized_metrics = json.dumps({"metrics": metrics})
        body_writer = StringProducer(serialized_metrics)

        headers = Headers({
            'User-Agent': ['Zenoss Metric Publisher: %s' % self._agent_suffix],
            'Content-Type': ['application/json']
        })

        if self._needsAuth and not self._authenticated:
            log.info("Adding auth for metric http post %s", self._url)
            headers.addRawHeader(
                'Authorization',
                basic_auth_string_content(self._username, self._password))

        d = self._agent.request('POST', self._url, headers, body_writer)

        d.addCallbacks(self._metrics_published,
                       errback=self._publish_failed,
                       callbackArgs=[len(metrics), len(self._mq)],
                       errbackArgs=[metrics])
        d.addCallbacks(self._response_finished,
                       errback=self._publish_failed,
                       errbackArgs=[metrics])

        return d

    def _put(self, scheduled):
        """
        Push the buffer of metrics to the specified Redis channel
        @param scheduled: scheduled invocation?
        """
        if scheduled:
            self._reschedule_pubtask(scheduled)

        if len(self._mq) == 0:
            return defer.succeed(0)

        log.debug('trying to publish %d metrics', len(self._mq))
        return self._make_request()
Esempio n. 14
0
class SafeBoxClient():
    def __init__(self, server_addr="localhost:8000"):
        self.server_addr = server_addr
        self.client_id = self.ccid = self.pin = None
        self.cookie_jar = CookieJar()
        self.curr_ticket = ""

    # startClient: Initializes the client's remaining attributes,
    # this implies starting a session and eventually client registration.
    def startClient(self, ccid, passwd, pin):

        # checking if client is already registered
        def checkClientReg_cb(success):
            if success == False:
                print "User not registered."
                if pin is None:
                    print "Please provide your Citizen Card for registration"
                    reactor.stop()
                    return
                else:
                    print "Registering user..."
                    return self.handleRegister()
            #pprint(self.cookie_jar.__dict__)
            print "User: "******" logged in."
            for cookie in self.cookie_jar:
                #print cookie
                #print type(cookie)
                self.curr_ticket = self.client_id.decryptData(cookie.value)

        # Instanciating ClientIdentity
        def startClientId_cb(key):
            self.client_id = ClientIdentity(self.ccid, passwd, key)
            self.handleStartSession(checkClientReg_cb)

        self.ccid = ccid
        if pin is not None:
            self.pin = pin
        return self.handleGetKey(startClientId_cb)

# Session, Registry and Authentication related opreations
#
# handleGetKey: handles getkey operations, this happens as the
# first step of the startClient operation.

    def handleGetKey(self, method):
        def handleGetKey_cb(response):
            defer = Deferred()
            defer.addCallback(method)
            response.deliverBody(DataPrinter(defer, "getkey"))
            return NOT_DONE_YET

        agent = Agent(reactor)
        headers = http_headers.Headers()
        d = agent.request('GET',
                          'http://localhost:8000/session/?method=getkey',
                          headers, None)

        d.addCallback(handleGetKey_cb)

        return NOT_DONE_YET

    # handleStartSession: handles startsession operations
    def handleStartSession(self, method):
        def procResponse_cb(response):
            defer = Deferred()
            defer.addCallback(method)
            response.deliverBody(DataPrinter(defer, "bool"))
            return NOT_DONE_YET

        def startSession_cb((signedNonce, nonceid)):
            agent = CookieAgent(Agent(reactor), self.cookie_jar)
            dataq = []
            dataq.append(signedNonce)
            body = _FileProducer(
                StringIO(self.client_id.encryptData(self.client_id.password)),
                dataq)
            headers = http_headers.Headers()
            d = agent.request(
                'PUT',
                'http://localhost:8000/session/?method=startsession&ccid=' +
                self.ccid + '&nonceid=' + str(nonceid), headers, body)
            d.addCallback(procResponse_cb)
            return NOT_DONE_YET

        def getNonce_cb(response):
            defer = Deferred()
            defer.addCallback(startSession_cb)
            response.deliverBody(getNonce(defer, self.client_id, self.pin))
            return NOT_DONE_YET

        if self.pin != None:
            agent = Agent(reactor)
            body = FileBodyProducer(
                StringIO(self.client_id.pub_key.exportKey('PEM')))
            headers = http_headers.Headers()
            d = agent.request(
                'GET', 'http://localhost:8000/session/?method=getnonce',
                headers, body)

            d.addCallback(getNonce_cb)

            return NOT_DONE_YET

        agent = CookieAgent(Agent(reactor), self.cookie_jar)
        body = FileBodyProducer(
            StringIO(self.client_id.encryptData(self.client_id.password)))
        headers = http_headers.Headers()
        d = agent.request(
            'PUT', 'http://localhost:8000/session/?method=startsession&ccid=' +
            self.ccid + '&nonceid=' + str(-1), headers, body)
        d.addCallback(procResponse_cb)
        return NOT_DONE_YET

    # handleRegister: Handles the registration process. Also part of the startClient operation.
    def handleRegister(self):
        def checkClientReg_cb(success):
            if success == False:
                print "ERROR: Couldn't register user."
                reactor.stop()
                return

            #pprint(self.cookie_jar.__dict__)
            for cookie in self.cookie_jar:
                #print cookie
                #print type(cookie)
                self.curr_ticket = self.client_id.decryptData(cookie.value)
            print "Registration Successful."
            print "User: "******" logged in."

        def procResponse_cb(response, method):
            defer = Deferred()
            defer.addCallback(method)
            response.deliverBody(DataPrinter(defer, "bool"))
            return NOT_DONE_YET

        def register_cb((signedNonce, nonceid)):
            agent = CookieAgent(Agent(reactor), self.cookie_jar)
            dataq = []
            dataq.append(signedNonce)
            dataq.append(self.client_id.encryptData(self.client_id.password))
            # Sending the Certificate and the Sub CA to the server
            if self.pin is None:
                print "ERROR! Check the pin!"
                reactor.stop()
            cert = cc.get_certificate(cc.CERT_LABEL, self.pin)
            #print type(cert.as_pem())
            #print cert.as_pem()
            if cert is None:
                print "ERROR! Check the pin"
                reactor.stop()
            subca = cc.get_certificate(cc.SUBCA_LABEL, self.pin)
            #print type(subca.as_pem())
            #print subca.as_pem()
            if subca is None:
                print "ERROR! Check the pin"
                reactor.stop()

            enc_cert = b64encode(cert.as_pem())
            #print "cert len: ", len(enc_cert)
            dataq.append(enc_cert)
            enc_subca = b64encode(subca.as_pem())
            #print "sub ca len: ", len(enc_subca)
            dataq.append(enc_subca)
            dataq.append(self.client_id.pub_key.exportKey('PEM'))
            ext_key = self.client_id.pub_key.exportKey('PEM')
            if self.pin is None:
                print "ERROR! Check the pin or the CC"
                reactor.stop()
            signed_ext_key = cc.sign(ext_key, cc.KEY_LABEL, self.pin)
            enc_sek = b64encode(signed_ext_key)
            #print "encoded ext key: ", enc_sek
            #print "len encoded: ", len(enc_sek)
            dataq.append(enc_sek)
            body = FileProducer2(dataq)
            headers = http_headers.Headers()
            #print "Password:"******"LEN:", len(self.client_id.encryptData(self.client_id.password))
            d = agent.request(
                'PUT', 'http://localhost:8000/pboxes/?method=register' +
                '&nonceid=' + str(nonceid), headers, body)
            d.addCallback(procResponse_cb, checkClientReg_cb)

        def getNonce_cb(response):
            defer = Deferred()
            defer.addCallback(register_cb)
            response.deliverBody(getNonce(defer, self.client_id, self.pin))
            return NOT_DONE_YET

        agent = Agent(reactor)
        body = FileBodyProducer(
            StringIO(self.client_id.pub_key.exportKey('PEM')))
        headers = http_headers.Headers()
        d = agent.request('GET',
                          'http://localhost:8000/session/?method=getnonce',
                          headers, body)

        d.addCallback(getNonce_cb)
        return NOT_DONE_YET

    def processCookie(self, uri):
        dci = number.long_to_bytes(
            number.bytes_to_long(self.curr_ticket) + long("1", base=10))
        #print "incremented ticket", number.bytes_to_long(dci)
        self.curr_ticket = dci
        sci = self.client_id.signData(str(dci))
        enc = self.client_id.encryptData(sci)
        for cookie in self.cookie_jar:
            cookie.value = enc
            cookie.path = uri
            self.cookie_jar.clear()
            self.cookie_jar.set_cookie(cookie)
        return dci
        #print cookie

# List Operations
#
# handleList: handles every list command

    def handleList_cb(self, response):
        defer = Deferred()
        response.deliverBody(DataPrinter(defer, "list"))
        return NOT_DONE_YET

    def handleListPboxes(self):
        args = ("list", str(self.ccid))
        salt = self.processCookie("/pboxes")
        body = FileBodyProducer(
            StringIO(self.client_id.genHashArgs(args, salt)))
        #print "hashed:", self.client_id.genHashArgs(args, salt)
        agent = CookieAgent(Agent(reactor), self.cookie_jar)
        headers = http_headers.Headers()
        d = agent.request(
            'GET',
            'http://localhost:8000/pboxes/?method=list&ccid=' + self.ccid,
            headers, body)
        d.addCallback(self.handleList_cb)
        return NOT_DONE_YET

    def handleListFiles(self):
        args = ("list", str(self.ccid))
        salt = self.processCookie("/files")
        body = FileBodyProducer(
            StringIO(self.client_id.genHashArgs(args, salt)))

        agent = CookieAgent(Agent(reactor), self.cookie_jar)
        headers = http_headers.Headers()
        d = agent.request(
            'GET',
            'http://localhost:8000/files/?method=list&ccid=' + self.ccid,
            headers, body)
        d.addCallback(self.handleList_cb)
        return NOT_DONE_YET

    def handleListShares(self):
        args = ("list", str(self.ccid))
        salt = self.processCookie("/shares")
        body = FileBodyProducer(
            StringIO(self.client_id.genHashArgs(args, salt)))

        agent = CookieAgent(Agent(reactor), self.cookie_jar)
        headers = http_headers.Headers()
        d = agent.request(
            'GET',
            'http://localhost:8000/shares/?method=list&ccid=' + self.ccid,
            headers, body)
        d.addCallback(self.handleList_cb)
        return NOT_DONE_YET

# Get Operations
#
# handleGetMData: Handles get pbox metadata operations.

    def handleGetMData(self, data):
        #data = (method, tgtccid)
        pprint(data)

        def handleGetMData_cb(response):
            defer = Deferred()
            defer.addCallback(data[0])
            response.deliverBody(DataPrinter(defer, "getmdata"))
            return NOT_DONE_YET

        args = ("get_mdata", str(self.ccid), data[1])
        salt = self.processCookie("/pboxes")
        body = FileBodyProducer(
            StringIO(self.client_id.genHashArgs(args, salt)))

        agent = CookieAgent(Agent(reactor), self.cookie_jar)
        headers = http_headers.Headers()
        d = agent.request(
            'GET', 'http://localhost:8000/pboxes/?method=get_mdata&ccid=' +
            self.ccid + "&tgtccid=" + data[1], headers, body)

        d.addCallback(handleGetMData_cb)

        return NOT_DONE_YET

    # handleGetFileMData: Handles get file metadata operations.
    def handleGetFileMData(self, data):
        #data = (method, fileid)
        def handleGetFileMData_cb(response):
            defer = Deferred()
            defer.addCallback(data[0])
            response.deliverBody(DataPrinter(defer, "getmdata"))
            return NOT_DONE_YET

        args = ("get_mdata", str(self.ccid), data[1])
        salt = self.processCookie("/files")
        body = FileBodyProducer(
            StringIO(self.client_id.genHashArgs(args, salt)))

        agent = CookieAgent(Agent(reactor), self.cookie_jar)
        headers = http_headers.Headers()
        d = agent.request(
            'GET', 'http://localhost:8000/files/?method=get_mdata&ccid=' +
            self.ccid + "&fileid=" + data[1], headers, body)

        d.addCallback(handleGetFileMData_cb)

        return NOT_DONE_YET

    # handleGetShareMData: Handles get share metadata operations.
    def handleGetShareMData(self, data):
        #data = (method, fileid)
        def handleGetShareMData_cb(response):
            defer = Deferred()
            defer.addCallback(data[0])
            response.deliverBody(DataPrinter(defer, "getmdata"))
            return NOT_DONE_YET

        args = ("get_mdata", str(self.ccid), data[1])
        salt = self.processCookie("/shares")
        body = FileBodyProducer(
            StringIO(self.client_id.genHashArgs(args, salt)))

        agent = CookieAgent(Agent(reactor), self.cookie_jar)
        headers = http_headers.Headers()
        d = agent.request(
            'GET', 'http://localhost:8000/shares/?method=get_mdata&ccid=' +
            self.ccid + "&fileid=" + data[1], headers, body)

        d.addCallback(handleGetShareMData_cb)

        return NOT_DONE_YET

    # handleGet: handles get file
    #def handleGet(self, line):
    def printResult_cb(self, data):
        pprint(data)  #TODO: Format this!
        return NOT_DONE_YET

    # for info requests
    def handleGetInfo(self, s):
        if s[1].lower() == "pboxinfo":
            return self.handleGetMData((self.printResult_cb, s[2].lower()))
        elif s[1].lower() == "fileinfo":
            return self.handleGetFileMData((self.printResult_cb, s[2].lower()))
        elif s[1].lower() == "shareinfo":
            return self.handleGetShareMData(
                (self.printResult_cb, s[2].lower()))

    # Decrypt and write the file
    def writeFile_cb(self, ignore,
                     s):  #we should implement http error code checking
        fileId = s[2]
        enc_file = open(fileId, "r")
        if len(s) == 4:
            dec_file = open(s[3], "w")
        else:
            dec_file = open(fileId + "_decrypted", "w")

        enc_key = enc_file.read(IV_KEY_SIZE_B64)
        # print "debugging: iv key writefile"
        # print enc_key
        print "Decrypting file..."
        key = self.client_id.decryptData(enc_key)
        enc_iv = enc_file.read(IV_KEY_SIZE_B64)
        #print enc_iv
        iv = self.client_id.decryptData(enc_iv)
        print iv
        self.client_id.decryptFileSym(enc_file, dec_file, key, iv)
        print "File written."

    # for get file
    def handleGetFile(self, s):
        def handleGetFile_cb(response, f):
            finished = Deferred()
            finished.addCallback(self.writeFile_cb, s)
            cons = FileConsumer(f)
            response.deliverBody(FileDownload(finished, cons))
            print "Downloading file..."
            return finished

        fileId = s[2]
        args = ("getfile", str(self.ccid), str(fileId))
        salt = self.processCookie("/files")
        body = FileBodyProducer(
            StringIO(self.client_id.genHashArgs(args, salt)))

        agent = CookieAgent(Agent(reactor), self.cookie_jar)
        headers = http_headers.Headers()
        d = agent.request(
            'GET', 'http://localhost:8000/files/?method=getfile&ccid=' +
            self.ccid + '&fileid=' + str(fileId), headers, body)
        f = open(fileId, "w")
        d.addCallback(handleGetFile_cb, f)
        return NOT_DONE_YET

    # for get shared
    def handleGetShared(self, s):
        def handleGetShared_cb(response, f):
            finished = Deferred()
            finished.addCallback(self.writeFile_cb, s)
            cons = FileConsumer(f)
            response.deliverBody(FileDownload(finished, cons))
            print "Downloading file..."
            return finished

        fileId = s[2]
        args = ("getshared", str(self.ccid), str(fileId))
        salt = self.processCookie("/shares")
        body = FileBodyProducer(
            StringIO(self.client_id.genHashArgs(args, salt)))

        agent = CookieAgent(Agent(reactor), self.cookie_jar)
        headers = http_headers.Headers()
        d = agent.request(
            'GET', 'http://localhost:8000/shares/?method=getshared&ccid=' +
            self.ccid + '&fileid=' + fileId, headers, body)
        f = open(fileId, "w")
        d.addCallback(handleGetShared_cb, f)
        return NOT_DONE_YET

# Put Operations
# printPutReply_cb: prints put and update responses

    def printPutReply_cb(self, response):
        print "Done."

        defer = Deferred()
        response.deliverBody(DataPrinter(defer, "getmdata"))
        return NOT_DONE_YET

    # handlePutFile: handles file upload
    def handlePutFile(self, line):
        print "Encrypting file..."
        s = line.split()
        file = open(s[2], 'r')
        enc_file = open("enc_fileout", 'w')
        crd = self.client_id.encryptFileSym(file, enc_file)

        args = ("putfile", str(self.ccid), os.path.basename(s[2]))
        salt = self.processCookie("/files")

        dataq = []
        dataq.append(self.client_id.genHashArgs(args, salt))
        dataq.append(self.client_id.encryptData(crd[0],
                                                self.client_id.pub_key))
        dataq.append(self.client_id.encryptData(crd[1]))
        agent = CookieAgent(Agent(reactor), self.cookie_jar)
        #print crd[1]
        # print "debugging:key, iv putfile"
        # print dataq[1]
        # print len(dataq[1])
        # print dataq[2]
        # print len(dataq[2])
        print "Uploading file..."
        enc_file = open("enc_fileout", 'r')
        body = _FileProducer(enc_file, dataq)
        headers = http_headers.Headers()
        d = agent.request(
            'PUT', 'http://localhost:8000/files/?method=putfile&ccid=' +
            self.ccid + "&name=" + os.path.basename(s[2]), headers, body)
        d.addCallback(self.printPutReply_cb)

        return NOT_DONE_YET

# Update Operations
#
#handles update commands

    def handleUpdate(self, s):
        def encryptFile_cb(data):  #TODO: Some error checking here.
            def updateFile_cb(iv):
                #data = (key,)
                print "Updating file..."

                args = ("updatefile", str(self.ccid), os.path.basename(s[3]),
                        s[2])
                salt = self.processCookie("/files")

                dataq = []
                dataq.append(self.client_id.genHashArgs(args, salt))
                dataq.append(iv)
                # print "debugging:ticket, iv updatefile"
                # print dataq[0]
                # print dataq[1]
                # print len(dataq[1])
                agent = CookieAgent(Agent(reactor), self.cookie_jar)
                print "Uploading file..."
                enc_file = open("enc_fileout", 'r')
                body = _FileProducer(enc_file, dataq)
                headers = http_headers.Headers()
                d = agent.request(
                    'POST',
                    'http://localhost:8000/files/?method=updatefile&ccid=' +
                    self.ccid + "&name=" + os.path.basename(s[3]) +
                    "&fileid=" + s[2], headers, body)
                d.addCallback(self.printPutReply_cb)

                return NOT_DONE_YET

            def updateShared_cb(iv):
                print "Updating file..."

                args = ("updateshared", str(self.ccid), os.path.basename(s[3]),
                        s[2])
                salt = self.processCookie("/shares")

                dataq = []
                dataq.append(self.client_id.genHashArgs(args, salt))
                dataq.append(iv)
                # print "debugging:ticket, iv updatefile"
                # print dataq[0]
                # print dataq[1]
                # print len(dataq[1])
                print "Uploading file..."
                agent = CookieAgent(Agent(reactor), self.cookie_jar)
                enc_file = open("enc_fileout", 'r')
                body = _FileProducer(enc_file, dataq)
                headers = http_headers.Headers()
                d = agent.request(
                    'POST',
                    'http://localhost:8000/shares/?method=updateshared&ccid=' +
                    self.ccid + "&name=" + os.path.basename(s[3]) +
                    "&fileid=" + s[2], headers, body)
                d.addCallback(self.printPutReply_cb)

                return NOT_DONE_YET

            if isinstance(data, basestring):
                print data
                return

            print "Encrypting file..."
            #print data["data"]["SymKey"]
            enc_key = data["data"]["SymKey"]
            key = self.client_id.decryptData(enc_key, self.client_id.priv_key)
            #print len(key)
            file = open(s[3], 'r')
            enc_file = open("enc_fileout", 'w')
            crd = self.client_id.encryptFileSym(file, enc_file, key=key)
            new_iv = self.client_id.encryptData(crd[1])
            if s[1] == "shared":
                return updateShared_cb(new_iv)
            return updateFile_cb(new_iv)

        hsmd_data = (encryptFile_cb, s[2])
        if s[1] == "file":
            return self.handleGetFileMData(hsmd_data)
        return self.handleGetShareMData(hsmd_data)

    def handleUpdateSharePerm(self, s):
        args = ("updateshareperm", str(self.ccid), s[3], s[2], s[4])
        salt = self.processCookie("/shares")
        body = FileBodyProducer(
            StringIO(self.client_id.genHashArgs(args, salt)))

        agent = CookieAgent(Agent(reactor), self.cookie_jar)
        headers = http_headers.Headers()
        d = agent.request(
            'POST',
            'http://localhost:8000/shares/?method=updateshareperm&ccid=' +
            self.ccid + "&rccid=" + s[3] + "&fileid=" + s[2] + "&writeable=" +
            s[4], headers, body)
        d.addCallback(self.printPutReply_cb)

        return NOT_DONE_YET

#Delete Operaions
#
# handleDelete: handles delete commands

    def handleDelete(self, line):
        def printDeleteReply_cb(data):
            if not data:
                print "Done."
            else:
                print "Done."

        def deleteFile_cb():
            args = ("delete", str(self.ccid), s[2])
            salt = self.processCookie("/files")
            body = FileBodyProducer(
                StringIO(self.client_id.genHashArgs(args, salt)))

            agent = CookieAgent(Agent(reactor), self.cookie_jar)
            headers = http_headers.Headers()
            d = agent.request(
                'DELETE', 'http://localhost:8000/files/?method=delete&ccid=' +
                self.ccid + "&fileid=" + s[2], headers, body)

            d.addCallback(printDeleteReply_cb)

        def deleteShare_cb():
            args = ("delete", str(self.ccid), s[2], s[3])
            salt = self.processCookie("/shares")
            body = FileBodyProducer(
                StringIO(self.client_id.genHashArgs(args, salt)))

            agent = CookieAgent(Agent(reactor), self.cookie_jar)
            headers = http_headers.Headers()
            d = agent.request(
                'DELETE', 'http://localhost:8000/shares/?method=delete&ccid=' +
                self.ccid + "&fileid=" + s[2] + "&rccid=" + s[3], headers,
                body)

            d.addCallback(printDeleteReply_cb)

        s = line.split()
        if len(s) == 4:
            return deleteShare_cb()
        if len(s) == 3:
            return deleteFile_cb()

        print "Error: invalid arguments!\n"
        print "Usage: delete <file|share> <fileid> <None|rccid>"
        return

# Share Operation
#

    def handleShare(self, line):
        def getFKey_cb(data):
            enc_key = data["data"]["SymKey"]

            def getDstKey_cb(data):
                dstkey = data["data"]["PubKey"]
                print "pubkey" + dstkey

                def shareFile_cb():
                    args = ("delete", str(self.ccid), s[3], s[2])
                    salt = self.processCookie("/shares")

                    dataq = []
                    dataq.append(self.client_id.genHashArgs(args, salt))
                    dataq.append(enc_sym_key)
                    print "Uploading symkey..."

                    agent = CookieAgent(Agent(reactor), self.cookie_jar)
                    body = _FileProducer(StringIO(""), dataq)
                    headers = http_headers.Headers()
                    d = agent.request(
                        'PUT',
                        'http://localhost:8000/shares/?method=sharefile&ccid='
                        + self.ccid + "&rccid=" + s[3] + "&fileid=" + s[2],
                        headers, body)
                    d.addCallback(self.printPutReply_cb)

                    return d

                #enc_key = data["data"]["SymKey"]
                sym_key = self.client_id.decryptData(enc_key,
                                                     self.client_id.priv_key)
                dstkey = RSA.importKey(dstkey)
                enc_sym_key = self.client_id.encryptData(sym_key, dstkey)
                return shareFile_cb()

            hfmd_data = (getDstKey_cb, s[3].lower())
            return self.handleGetMData(hfmd_data)

        s = line.split()
        if len(s) == 4:
            hmd_data = (getFKey_cb, s[2].lower())
            return self.handleGetFileMData(hmd_data)

        else:
            if s[1].lower() != "file":
                print "Error: invalid arguments!\n"
                print "Usage: share file <fileid> <recipient's ccid>"
                return
Esempio n. 15
0
class ControlPlaneClient(object):
    """
    """

    def __init__(self, user, password, host=None, port=None):
        """
        """
        self._cj = CookieJar()
        self._opener = urllib2.build_opener(
            urllib2.HTTPHandler(),
            urllib2.HTTPSHandler(),
            urllib2.HTTPCookieProcessor(self._cj)
        )
        self._server = {
            "host": host if host else _DEFAULT_HOST,
            "port": port if port else _DEFAULT_PORT,
        }
        self._creds = {"username": user, "password": password}
        self._netloc = "%(host)s:%(port)s" % self._server

    def queryServices(self, name=None, tags=None):
        """
        Returns a sequence of ServiceDefinition objects that match
        the given requirements.
        """
        query = {}
        if name:
            namepat = fnmatch.translate(name)
            # controlplane regex accepts \z, not \Z.
            namepat = namepat.replace("\\Z", "\\z")
            query["name"] = namepat
        if tags:
            if isinstance(tags, (str, unicode)):
                tags = [tags]
            query["tags"] = ','.join(tags)
        response = self._dorequest("/services", query=query)
        body = ''.join(response.readlines())
        response.close()
        decoded = ServiceJsonDecoder().decode(body)
        if decoded is None:
            decoded = []
        return decoded

    def getService(self, serviceId, default=None):
        """
        Returns the ServiceDefinition object for the given service.
        """
        response = self._dorequest("/services/%s" % serviceId)
        body = ''.join(response.readlines())
        response.close()
        return ServiceJsonDecoder().decode(body)

    def updateService(self, service):
        """
        Updates the definition/state of a service.

        :param ServiceDefinition service: The modified definition
        """
        body = ServiceJsonEncoder().encode(service)
        response = self._dorequest(
            service.resourceId, method="PUT", data=body
        )
        body = ''.join(response.readlines())
        response.close()

    def queryServiceInstances(self, serviceId):
        """
        Returns a sequence of ServiceInstance objects.
        """
        response = self._dorequest("/services/%s/running" % serviceId)
        body = ''.join(response.readlines())
        response.close()
        return ServiceJsonDecoder().decode(body)

    def getInstance(self, serviceId, instanceId, default=None):
        """
        Returns the requested ServiceInstance object.
        """
        response = self._dorequest(
            "/services/%s/running/%s" % (serviceId, instanceId)
        )
        body = ''.join(response.readlines())
        response.close()
        return ServiceJsonDecoder().decode(body)

    def getServiceLog(self, serviceId, start=0, end=None):
        """
        """
        response = self._dorequest("/services/%s/logs" % serviceId)
        body = ''.join(response.readlines())
        response.close()
        log = json.loads(body)
        return log["Detail"]

    def getInstanceLog(self, serviceId, instanceId, start=0, end=None):
        """
        """
        response = self._dorequest(
            "/services/%s/%s/logs" % (serviceId, instanceId)
        )
        body = ''.join(response.readlines())
        response.close()
        log = json.loads(body)
        return str(log["Detail"])

    def killInstance(self, hostId, instanceId):
        """
        """
        response = self._dorequest(
            "/hosts/%s/%s" % (hostId, instanceId), method="DELETE"
        )
        response.close()

    def _makeRequest(self, uri, method=None, data=None, query=None):
        query = urllib.urlencode(query) if query else ""
        url = urlunparse(("http", self._netloc, uri, "", query, ""))
        args = {}
        if method:
            args["method"] = method
        if data:
            args["data"] = data
            args["headers"] = {"Content-Type": "application/json"}
        return _Request(url, **args)

    def _login(self):
        # Clear the cookie jar before logging in.
        self._cj.clear()
        encodedbody = json.dumps(self._creds)
        request = self._makeRequest("/login", data=encodedbody)
        response = self._opener.open(request)
        response.close()
        self._opener.close()

    def _dorequest(self, uri, method=None, data=None, query=None):
        request = self._makeRequest(
            uri, method=method, data=data, query=query)
        # Try to perform the request up to five times
        for trycount in range(5):
            try:
                return self._opener.open(request)
            except urllib2.HTTPError as ex:
                if ex.getcode() == 401:
                    self._login()
                    continue
                else:
                    raise
            else:
                # break the loop so we skip the loop's else clause
                break
        else:
            # raises the last exception that was raised (the 401 error)
            raise
Esempio n. 16
0
        ppft = re.search('<input type="hidden" name="PPFT" id="[^"]+" value="([^"]+)"', html).group(1)
        action = re.search("var srf_uPost='([^']+)'", html).group(1)

        postData = dict(re.findall("var srf_s([^=]+)='([^']+)';", html))

        html = urllib2.urlopen(action, urllib.urlencode({
            'PPFT': ppft,
            'login': email,
            'passwd': password,
            }).encode()).read().decode('utf-8')

        if html.find('replace("http://mail.live.com/default.aspx?rru=inbox")') == -1:
            print "Can't login into HotMail with: %s - %s" % (name, password)
        else:
        #        cookie.clear(domain='.mail.live.com', path='/', name='KVC')
            cookie.clear(domain='.live.com', path='/', name='WLSSC')

            print 'Logged in. Redirecting to email inbox...', 
            nexturl = 'http://mail.live.com/default.aspx?rru=inbox'
            inboxURL = ''
            while nexturl:
                try:
                    print '.', 
                    #                    print "\tRedirecting to %s" % nexturl
                    req = urllib2.urlopen(nexturl)
                    html = req.read().decode('utf-8')
                    inboxURL = req.url
                    nexturl = False
                except urllib2.HTTPError as e:
                    print e.read()
                    nexturl = urlparse.urljoin(nexturl, e.headers['location'])
Esempio n. 17
0
class HttpPostPublisher(BasePublisher):
    """
    Publish metrics via HTTP POST
    """

    def __init__(self,
                 username,
                 password,
                 url='https://localhost:8443/api/metrics/store',
                 buflen=defaultMetricBufferSize,
                 pubfreq=defaultPublishFrequency):
        super(HttpPostPublisher, self).__init__(buflen, pubfreq)
        self._username = username
        self._password = password
        self._needsAuth = False
        self._authenticated = False
        if self._username:
            self._needsAuth = True
        self._cookieJar = CookieJar()
        self._agent = CookieAgent(Agent(reactor), self._cookieJar)
        self._url = url
        self._agent_suffix = os.path.basename(sys.argv[0].rstrip(".py")) if sys.argv[0] else "python" 
        reactor.addSystemEventTrigger('before', 'shutdown', self._shutdown)

    def _metrics_published(self, response, llen, remaining=0):
        if response.code != 200:
            if response.code == UNAUTHORIZED:
                self._authenticated = False
                self._cookieJar.clear()
            raise IOError("Expected HTTP 200, but received %d from %s" % (response.code, self._url))

        if self._needsAuth:
            self._authenticated = True
        log.debug("published %d metrics and received response: %s",
                  llen, response.code)
        finished = defer.Deferred()
        response.deliverBody(ResponseReceiver(finished))
        if remaining:
            reactor.callLater(0, self._put, False)
        return finished

    def _response_finished(self, result):
        # The most likely result is the HTTP response from a successful POST,
        # which should be JSON formatted.
        if isinstance(result, str):
            log.debug("response was: %s", json.loads(result))
        # We could be called back because _publish_failed was called before us
        elif isinstance(result, int):
            log.info("queue still contains %d metrics", result)
        # Or something strange could have happend
        else:
            log.warn("Unexpected result: %s", result)

    def _shutdown(self):
        log.debug('shutting down [publishing]')
        if len(self._mq):
            self._make_request()

    def _make_request(self):
        metrics = []
        for x in xrange(HTTP_BATCH):
            if not self._mq:
                break
            metrics.append(self._mq.popleft())
        if not metrics:
            return defer.succeed(None)

        serialized_metrics = json.dumps({"metrics": metrics})
        body_writer = StringProducer(serialized_metrics)

        headers = Headers({
            'User-Agent': ['Zenoss Metric Publisher: %s' % self._agent_suffix],
            'Content-Type': ['application/json']})

        if self._needsAuth and not self._authenticated:
            log.info("Adding auth for metric http post %s", self._url)
            headers.addRawHeader('Authorization',
                                 basic_auth_string_content(self._username, self._password))

        d = self._agent.request(
            'POST', self._url, headers,
            body_writer)

        d.addCallbacks(self._metrics_published, errback=self._publish_failed,
        callbackArgs = [len(metrics), len(self._mq)], errbackArgs = [metrics])
        d.addCallbacks(self._response_finished, errback=self._publish_failed,
                       errbackArgs = [metrics])

        return d


    def _put(self, scheduled):
        """
        Push the buffer of metrics to the specified Redis channel
        @param scheduled: scheduled invocation?
        """
        if scheduled:
            self._reschedule_pubtask(scheduled)
            
        if len(self._mq) == 0:
            return defer.succeed(0)
                    
        log.debug('trying to publish %d metrics', len(self._mq))
        return self._make_request()
Esempio n. 18
0
class Bugz:
    """ Converts sane method calls to Bugzilla HTTP requests.

	@ivar base: base url of bugzilla.
	@ivar user: username for authenticated operations.
	@ivar password: password for authenticated operations
	@ivar cookiejar: for authenticated sessions so we only auth once.
	@ivar forget: forget user/password after session.
	@ivar authenticated: is this session authenticated already
	"""
    def __init__(self,
                 base,
                 user=None,
                 password=None,
                 forget=False,
                 skip_auth=False,
                 httpuser=None,
                 httppassword=None):
        """
		{user} and {password} will be prompted if an action needs them
		and they are not supplied.

		if {forget} is set, the login cookie will be destroyed on quit.

		@param base: base url of the bugzilla
		@type  base: string
		@keyword user: username for authenticated actions.
		@type    user: string
		@keyword password: password for authenticated actions.
		@type    password: string
		@keyword forget: forget login session after termination.
		@type    forget: bool
		@keyword skip_auth: do not authenticate
		@type    skip_auth: bool
		"""
        self.base = base
        scheme, self.host, self.path, query, frag = urlsplit(self.base)
        self.authenticated = False
        self.forget = forget

        if not self.forget:
            try:
                cookie_file = os.path.join(os.environ['HOME'], COOKIE_FILE)
                self.cookiejar = LWPCookieJar(cookie_file)
                if forget:
                    try:
                        self.cookiejar.load()
                        self.cookiejar.clear()
                        self.cookiejar.save()
                        os.chmod(self.cookiejar.filename, 0600)
                    except IOError:
                        pass
            except KeyError:
                self.warn('Unable to save session cookies in %s' % cookie_file)
                self.cookiejar = CookieJar(cookie_file)
        else:
            self.cookiejar = CookieJar()

        self.opener = build_opener(HTTPCookieProcessor(self.cookiejar))
        self.user = user
        self.password = password
        self.httpuser = httpuser
        self.httppassword = httppassword
        self.skip_auth = skip_auth

    def log(self, status_msg):
        """Default logging handler. Expected to be overridden by
		the UI implementing subclass.

		@param status_msg: status message to print
		@type  status_msg: string
		"""
        return

    def warn(self, warn_msg):
        """Default logging handler. Expected to be overridden by
		the UI implementing subclass.

		@param status_msg: status message to print
		@type  status_msg: string
		"""
        return

    def get_input(self, prompt):
        """Default input handler. Expected to be override by the
		UI implementing subclass.

		@param prompt: Prompt message
		@type  prompt: string
		"""
        return ''

    def auth(self):
        """Authenticate a session.
		"""
        # check if we need to authenticate
        if self.authenticated:
            return

        # try seeing if we really need to request login
        if not self.forget:
            try:
                self.cookiejar.load()
            except IOError:
                pass

        req_url = urljoin(self.base, config.urls['auth'])
        req_url += '?GoAheadAndLogIn=1'
        req = Request(req_url, None, config.headers)
        if self.httpuser and self.httppassword:
            base64string = base64.encodestring(
                '%s:%s' % (self.httpuser, self.httppassword))[:-1]
            req.add_header("Authorization", "Basic %s" % base64string)
        resp = self.opener.open(req)
        re_request_login = re.compile(r'<title>.*Log in to .*</title>')
        if not re_request_login.search(resp.read()):
            self.log('Already logged in.')
            self.authenticated = True
            return

        # prompt for username if we were not supplied with it
        if not self.user:
            self.log('No username given.')
            self.user = self.get_input('Username: '******'No password given.')
            self.password = getpass.getpass()

        # perform login
        qparams = config.params['auth'].copy()
        qparams['Bugzilla_login'] = self.user
        qparams['Bugzilla_password'] = self.password
        if not self.forget:
            qparams['Bugzilla_remember'] = 'on'

        req_url = urljoin(self.base, config.urls['auth'])
        req = Request(req_url, urlencode(qparams), config.headers)
        if self.httpuser and self.httppassword:
            base64string = base64.encodestring(
                '%s:%s' % (self.httpuser, self.httppassword))[:-1]
            req.add_header("Authorization", "Basic %s" % base64string)
        resp = self.opener.open(req)
        if resp.info().has_key('Set-Cookie'):
            self.authenticated = True
            if not self.forget:
                self.cookiejar.save()
                os.chmod(self.cookiejar.filename, 0600)
            return True
        else:
            raise RuntimeError("Failed to login")

    def extractResults(self, resp):
        # parse the results into dicts.
        results = []
        columns = []
        rows = []

        for r in csv.reader(resp):
            rows.append(r)
        for field in rows[0]:
            if config.choices['column_alias'].has_key(field):
                columns.append(config.choices['column_alias'][field])
            else:
                self.log('Unknown field: ' + field)
                columns.append(field)
        for row in rows[1:]:
            if "Missing Search" in row[0]:
                self.log('Bugzilla error (Missing search found)')
                return None
            fields = {}
            for i in range(min(len(row), len(columns))):
                fields[columns[i]] = row[i]
            results.append(fields)
        return results

    def search(self,
               query,
               comments=False,
               order='number',
               assigned_to=None,
               reporter=None,
               cc=None,
               commenter=None,
               whiteboard=None,
               keywords=None,
               status=[],
               severity=[],
               priority=[],
               product=[],
               component=[]):
        """Search bugzilla for a bug.

		@param query: query string to search in title or {comments}.
		@type  query: string
		@param order: what order to returns bugs in.
		@type  order: string

		@keyword assigned_to: email address which the bug is assigned to.
		@type    assigned_to: string
		@keyword reporter: email address matching the bug reporter.
		@type    reporter: string
		@keyword cc: email that is contained in the CC list
		@type    cc: string
		@keyword commenter: email of a commenter.
		@type    commenter: string

		@keyword whiteboard: string to search in status whiteboard (gentoo?)
		@type    whiteboard: string
		@keyword keywords: keyword to search for
		@type    keywords: string

		@keyword status: bug status to match. default is ['NEW', 'ASSIGNED',
						 'REOPENED'].
		@type    status: list
		@keyword severity: severity to match, empty means all.
		@type    severity: list
		@keyword priority: priority levels to patch, empty means all.
		@type    priority: list
		@keyword comments: search comments instead of just bug title.
		@type    comments: bool
		@keyword product: search within products. empty means all.
		@type    product: list
		@keyword component: search within components. empty means all.
		@type    component: list

		@return: list of bugs, each bug represented as a dict
		@rtype: list of dicts
		"""

        if not self.authenticated and not self.skip_auth:
            self.auth()

        qparams = config.params['list'].copy()
        qparams['value0-0-0'] = query
        if comments:
            qparams['type0-0-1'] = qparams['type0-0-0']
            qparams['value0-0-1'] = query

        qparams['order'] = config.choices['order'].get(order, 'Bug Number')
        qparams['bug_severity'] = severity or []
        qparams['priority'] = priority or []
        if status is None:
            # NEW, ASSIGNED and REOPENED is obsolete as of bugzilla 3.x and has
            # been removed from bugs.gentoo.org on 2011/05/01
            qparams['bug_status'] = [
                'NEW', 'ASSIGNED', 'REOPENED', 'UNCONFIRMED', 'CONFIRMED',
                'IN_PROGRESS'
            ]
        elif [s.upper() for s in status] == ['ALL']:
            qparams['bug_status'] = config.choices['status']
        else:
            qparams['bug_status'] = [s.upper() for s in status]
        qparams['product'] = product or ''
        qparams['component'] = component or ''
        qparams['status_whiteboard'] = whiteboard or ''
        qparams['keywords'] = keywords or ''

        # hoops to jump through for emails, since there are
        # only two fields, we have to figure out what combinations
        # to use if all three are set.
        unique = list(set([assigned_to, cc, reporter, commenter]))
        unique = [u for u in unique if u]
        if len(unique) < 3:
            for i in range(len(unique)):
                e = unique[i]
                n = i + 1
                qparams['email%d' % n] = e
                qparams['emailassigned_to%d' % n] = int(e == assigned_to)
                qparams['emailreporter%d' % n] = int(e == reporter)
                qparams['emailcc%d' % n] = int(e == cc)
                qparams['emaillongdesc%d' % n] = int(e == commenter)
        else:
            raise AssertionError('Cannot set assigned_to, cc, and '
                                 'reporter in the same query')

        req_params = urlencode(qparams, True)
        req_url = urljoin(self.base, config.urls['list'])
        req_url += '?' + req_params
        req = Request(req_url, None, config.headers)
        if self.httpuser and self.httppassword:
            base64string = base64.encodestring(
                '%s:%s' % (self.httpuser, self.httppassword))[:-1]
            req.add_header("Authorization", "Basic %s" % base64string)
        resp = self.opener.open(req)
        return self.extractResults(resp)

    def namedcmd(self, cmd):
        """Run command stored in Bugzilla by name.

		@return: Result from the stored command.
		@rtype: list of dicts
		"""

        if not self.authenticated and not self.skip_auth:
            self.auth()

        qparams = config.params['namedcmd'].copy()
        # Is there a better way of getting a command with a space in its name
        # to be encoded as foo%20bar instead of foo+bar or foo%2520bar?
        qparams['namedcmd'] = quote(cmd)
        req_params = urlencode(qparams, True)
        req_params = req_params.replace('%25', '%')

        req_url = urljoin(self.base, config.urls['list'])
        req_url += '?' + req_params
        req = Request(req_url, None, config.headers)
        if self.user and self.password:
            base64string = base64.encodestring('%s:%s' %
                                               (self.user, self.password))[:-1]
            req.add_header("Authorization", "Basic %s" % base64string)
        resp = self.opener.open(req)

        return self.extractResults(resp)

    def get(self, bugid):
        """Get an ElementTree representation of a bug.

		@param bugid: bug id
		@type  bugid: int

		@rtype: ElementTree
		"""
        if not self.authenticated and not self.skip_auth:
            self.auth()

        qparams = config.params['show'].copy()
        qparams['id'] = bugid

        req_params = urlencode(qparams, True)
        req_url = urljoin(self.base, config.urls['show'])
        req_url += '?' + req_params
        req = Request(req_url, None, config.headers)
        if self.httpuser and self.httppassword:
            base64string = base64.encodestring(
                '%s:%s' % (self.httpuser, self.httppassword))[:-1]
            req.add_header("Authorization", "Basic %s" % base64string)
        resp = self.opener.open(req)

        data = resp.read()
        # Get rid of control characters.
        data = re.sub('[\x00-\x08\x0e-\x1f\x0b\x0c]', '', data)
        fd = StringIO(data)

        # workaround for ill-defined XML templates in bugzilla 2.20.2
        (major_version, minor_version) = \
            (sys.version_info[0], sys.version_info[1])
        if major_version > 2 or \
             (major_version == 2 and minor_version >= 7):
            # If this is 2.7 or greater, then XMLTreeBuilder
            # does what we want.
            parser = ElementTree.XMLParser()
        else:
            # Running under Python 2.6, so we need to use our
            # subclass of XMLTreeBuilder instead.
            parser = ForcedEncodingXMLTreeBuilder(encoding='utf-8')

        etree = ElementTree.parse(fd, parser)
        bug = etree.find('.//bug')
        if bug is not None and bug.attrib.has_key('error'):
            return None
        else:
            return etree

    def modify(self,
               bugid,
               title=None,
               comment=None,
               url=None,
               status=None,
               resolution=None,
               assigned_to=None,
               duplicate=0,
               priority=None,
               severity=None,
               add_cc=[],
               remove_cc=[],
               add_dependson=[],
               remove_dependson=[],
               add_blocked=[],
               remove_blocked=[],
               whiteboard=None,
               keywords=None,
               component=None):
        """Modify an existing bug

		@param bugid: bug id
		@type  bugid: int
		@keyword title: new title for bug
		@type    title: string
		@keyword comment: comment to add
		@type    comment: string
		@keyword url: new url
		@type    url: string
		@keyword status: new status (note, if you are changing it to RESOLVED, you need to set {resolution} as well.
		@type    status: string
		@keyword resolution: new resolution (if status=RESOLVED)
		@type    resolution: string
		@keyword assigned_to: email (needs to exist in bugzilla)
		@type    assigned_to: string
		@keyword duplicate: bug id to duplicate against (if resolution = DUPLICATE)
		@type    duplicate: int
		@keyword priority: new priority for bug
		@type    priority: string
		@keyword severity: new severity for bug
		@type    severity: string
		@keyword add_cc: list of emails to add to the cc list
		@type    add_cc: list of strings
		@keyword remove_cc: list of emails to remove from cc list
		@type    remove_cc: list of string.
		@keyword add_dependson: list of bug ids to add to the depend list
		@type    add_dependson: list of strings
		@keyword remove_dependson: list of bug ids to remove from depend list
		@type    remove_dependson: list of strings
		@keyword add_blocked: list of bug ids to add to the blocked list
		@type    add_blocked: list of strings
		@keyword remove_blocked: list of bug ids to remove from blocked list
		@type    remove_blocked: list of strings

		@keyword whiteboard: set status whiteboard
		@type    whiteboard: string
		@keyword keywords: set keywords
		@type    keywords: string
		@keyword component: set component
		@type    component: string

		@return: list of fields modified.
		@rtype: list of strings
		"""
        if not self.authenticated and not self.skip_auth:
            self.auth()

        buginfo = Bugz.get(self, bugid)
        if not buginfo:
            return False

        modified = []
        qparams = config.params['modify'].copy()
        qparams['id'] = bugid
        # NOTE: knob has been removed in bugzilla 4 and 3?
        qparams['knob'] = 'none'

        # copy existing fields
        FIELDS = ('bug_file_loc', 'bug_severity', 'short_desc', 'bug_status',
                  'status_whiteboard', 'keywords', 'resolution', 'op_sys',
                  'priority', 'version', 'target_milestone', 'assigned_to',
                  'rep_platform', 'product', 'component', 'token')

        FIELDS_MULTI = ('blocked', 'dependson')

        for field in FIELDS:
            try:
                qparams[field] = buginfo.find('.//%s' % field).text
                if qparams[field] is None:
                    del qparams[field]
            except:
                pass

        for field in FIELDS_MULTI:
            qparams[field] = [
                d.text for d in buginfo.findall('.//%s' % field)
                if d is not None and d.text is not None
            ]

        # set 'knob' if we are change the status/resolution
        # or trying to reassign bug.
        if status:
            status = status.upper()
        if resolution:
            resolution = resolution.upper()

        if status and status != qparams['bug_status']:
            # Bugzilla >= 3.x
            qparams['bug_status'] = status

            if status == 'RESOLVED':
                qparams['knob'] = 'resolve'
                if resolution:
                    qparams['resolution'] = resolution
                else:
                    qparams['resolution'] = 'FIXED'

                modified.append(('status', status))
                modified.append(('resolution', qparams['resolution']))
            elif status == 'ASSIGNED' or status == 'IN_PROGRESS':
                qparams['knob'] = 'accept'
                modified.append(('status', status))
            elif status == 'REOPENED':
                qparams['knob'] = 'reopen'
                modified.append(('status', status))
            elif status == 'VERIFIED':
                qparams['knob'] = 'verified'
                modified.append(('status', status))
            elif status == 'CLOSED':
                qparams['knob'] = 'closed'
                modified.append(('status', status))
        elif duplicate:
            # Bugzilla >= 3.x
            qparams['bug_status'] = "RESOLVED"
            qparams['resolution'] = "DUPLICATE"

            qparams['knob'] = 'duplicate'
            qparams['dup_id'] = duplicate
            modified.append(('status', 'RESOLVED'))
            modified.append(('resolution', 'DUPLICATE'))
        elif assigned_to:
            qparams['knob'] = 'reassign'
            qparams['assigned_to'] = assigned_to
            modified.append(('assigned_to', assigned_to))

        # setup modification of other bits
        if comment:
            qparams['comment'] = comment
            modified.append(('comment', ellipsis(comment, 60)))
        if title:
            qparams['short_desc'] = title or ''
            modified.append(('title', title))
        if url is not None:
            qparams['bug_file_loc'] = url
            modified.append(('url', url))
        if severity is not None:
            qparams['bug_severity'] = severity
            modified.append(('severity', severity))
        if priority is not None:
            qparams['priority'] = priority
            modified.append(('priority', priority))

        # cc manipulation
        if add_cc is not None:
            qparams['newcc'] = ', '.join(add_cc)
            modified.append(('newcc', qparams['newcc']))
        if remove_cc is not None:
            qparams['cc'] = remove_cc
            qparams['removecc'] = 'on'
            modified.append(('cc', remove_cc))

        # bug depend/blocked manipulation
        changed_dependson = False
        changed_blocked = False
        if remove_dependson:
            for bug_id in remove_dependson:
                qparams['dependson'].remove(str(bug_id))
                changed_dependson = True
        if remove_blocked:
            for bug_id in remove_blocked:
                qparams['blocked'].remove(str(bug_id))
                changed_blocked = True
        if add_dependson:
            for bug_id in add_dependson:
                qparams['dependson'].append(str(bug_id))
                changed_dependson = True
        if add_blocked:
            for bug_id in add_blocked:
                qparams['blocked'].append(str(bug_id))
                changed_blocked = True

        qparams['dependson'] = ','.join(qparams['dependson'])
        qparams['blocked'] = ','.join(qparams['blocked'])
        if changed_dependson:
            modified.append(('dependson', qparams['dependson']))
        if changed_blocked:
            modified.append(('blocked', qparams['blocked']))

        if whiteboard is not None:
            qparams['status_whiteboard'] = whiteboard
            modified.append(('status_whiteboard', whiteboard))
        if keywords is not None:
            qparams['keywords'] = keywords
            modified.append(('keywords', keywords))
        if component is not None:
            qparams['component'] = component
            modified.append(('component', component))

        req_params = urlencode(qparams, True)
        req_url = urljoin(self.base, config.urls['modify'])
        req = Request(req_url, req_params, config.headers)
        if self.httpuser and self.httppassword:
            base64string = base64.encodestring(
                '%s:%s' % (self.httpuser, self.httppassword))[:-1]
            req.add_header("Authorization", "Basic %s" % base64string)

        try:
            resp = self.opener.open(req)
            re_error = re.compile(r'id="error_msg".*>([^<]+)<')
            error = re_error.search(resp.read())
            if error:
                print error.group(1)
                return []
            return modified
        except:
            return []

    def attachment(self, attachid):
        """Get an attachment by attachment_id

		@param attachid: attachment id
		@type  attachid: int

		@return: dict with three keys, 'filename', 'size', 'fd'
		@rtype: dict
		"""
        if not self.authenticated and not self.skip_auth:
            self.auth()

        qparams = config.params['attach'].copy()
        qparams['id'] = attachid

        req_params = urlencode(qparams, True)
        req_url = urljoin(self.base, config.urls['attach'])
        req_url += '?' + req_params
        req = Request(req_url, None, config.headers)
        if self.httpuser and self.httppassword:
            base64string = base64.encodestring(
                '%s:%s' % (self.httpuser, self.httppassword))[:-1]
            req.add_header("Authorization", "Basic %s" % base64string)
        resp = self.opener.open(req)

        try:
            content_type = resp.info()['Content-type']
            namefield = content_type.split(';')[1]
            filename = re.search(r'name=\"(.*)\"', namefield).group(1)
            content_length = int(resp.info()['Content-length'], 0)
            return {'filename': filename, 'size': content_length, 'fd': resp}
        except:
            return {}

    def post(self,
             product,
             component,
             title,
             description,
             url='',
             assigned_to='',
             cc='',
             keywords='',
             version='',
             dependson='',
             blocked='',
             priority='',
             severity=''):
        """Post a bug

		@param product: product where the bug should be placed
		@type product: string
		@param component: component where the bug should be placed
		@type component: string
		@param title: title of the bug.
		@type  title: string
		@param description: description of the bug
		@type  description: string
		@keyword url: optional url to submit with bug
		@type url: string
		@keyword assigned_to: optional email to assign bug to
		@type assigned_to: string.
		@keyword cc: option list of CC'd emails
		@type: string
		@keyword keywords: option list of bugzilla keywords
		@type: string
		@keyword version: version of the component
		@type: string
		@keyword dependson: bugs this one depends on
		@type: string
		@keyword blocked: bugs this one blocks
		@type: string
		@keyword priority: priority of this bug
		@type: string
		@keyword severity: severity of this bug
		@type: string

		@rtype: int
		@return: the bug number, or 0 if submission failed.
		"""
        if not self.authenticated and not self.skip_auth:
            self.auth()

        qparams = config.params['post'].copy()
        qparams['product'] = product
        qparams['component'] = component
        qparams['short_desc'] = title
        qparams['comment'] = description
        qparams['assigned_to'] = assigned_to
        qparams['cc'] = cc
        qparams['bug_file_loc'] = url
        qparams['dependson'] = dependson
        qparams['blocked'] = blocked
        qparams['keywords'] = keywords

        #XXX: default version is 'unspecified'
        if version != '':
            qparams['version'] = version

        #XXX: default priority is 'Normal'
        if priority != '':
            qparams['priority'] = priority

        #XXX: default severity is 'normal'
        if severity != '':
            qparams['bug_severity'] = severity

        req_params = urlencode(qparams, True)
        req_url = urljoin(self.base, config.urls['post'])
        req = Request(req_url, req_params, config.headers)
        if self.httpuser and self.httppassword:
            base64string = base64.encodestring(
                '%s:%s' % (self.httpuser, self.httppassword))[:-1]
            req.add_header("Authorization", "Basic %s" % base64string)
        resp = self.opener.open(req)

        try:
            re_bug = re.compile(
                r'(?:\s+)?<title>.*Bug ([0-9]+) Submitted.*</title>')
            bug_match = re_bug.search(resp.read())
            if bug_match:
                return int(bug_match.group(1))
        except:
            pass

        return 0

    def attach(self,
               bugid,
               title,
               description,
               filename,
               content_type='text/plain',
               ispatch=False):
        """Attach a file to a bug.

		@param bugid: bug id
		@type  bugid: int
		@param title: short description of attachment
		@type  title: string
		@param description: long description of the attachment
		@type  description: string
		@param filename: filename of the attachment
		@type  filename: string
		@keywords content_type: mime-type of the attachment
		@type content_type: string

		@rtype: bool
		@return: True if successful, False if not successful.
		"""
        if not self.authenticated and not self.skip_auth:
            self.auth()

        qparams = config.params['attach_post'].copy()
        qparams['bugid'] = bugid
        qparams['description'] = title
        qparams['comment'] = description
        if ispatch:
            qparams['ispatch'] = '1'
            qparams['contenttypeentry'] = 'text/plain'
        else:
            qparams['contenttypeentry'] = content_type

        filedata = [('data', filename, open(filename).read())]
        content_type, body = encode_multipart_formdata(qparams.items(),
                                                       filedata)

        req_headers = config.headers.copy()
        req_headers['Content-type'] = content_type
        req_headers['Content-length'] = len(body)
        req_url = urljoin(self.base, config.urls['attach_post'])
        req = Request(req_url, body, req_headers)
        if self.httpuser and self.httppassword:
            base64string = base64.encodestring(
                '%s:%s' % (self.httpuser, self.httppassword))[:-1]
            req.add_header("Authorization", "Basic %s" % base64string)
        resp = self.opener.open(req)

        # TODO: return attachment id and success?
        try:
            re_attach = re.compile(r'<title>(.+)</title>')
            # Bugzilla 3/4
            re_attach34 = re.compile(r'Attachment \d+ added to Bug \d+')
            response = resp.read()
            attach_match = re_attach.search(response)
            if attach_match:
                if attach_match.group(
                        1) == "Changes Submitted" or re_attach34.match(
                            attach_match.group(1)):
                    return True
                else:
                    return attach_match.group(1)
            else:
                return False
        except:
            pass

        return False
Esempio n. 19
0
class Connection:

    ENCODING = 'gb18030'
    USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'
    BBS_URL = 'http://bbs.nju.edu.cn/'
    DATE_FORMAT = '%b %d %H:%M'
    LINE_WIDTH = 40
    base_url = 'http://bbs.nju.edu.cn/'

    def __init__(self, session=None):
        self._cj = CookieJar()
        self._opener = urllib2.build_opener(
                urllib2.HTTPCookieProcessor(self._cj))
        self._opener.addheaders = [('User-Agent', self.USER_AGENT)]
        self._opener.addheaders = [('Referer', self.BBS_URL)]
        if session:
            self.load_session(session)

    def _do_action(self, action, params=None, data=None):
        args = []
        if params:
            for k, v in params.items():
                if isinstance(v, list):
                    args += ['{0}={1}'.format(k, i) for i in v]
                else:
                    args.append('{0}={1}'.format(k, v))
        url = self.base_url + action + ('?' if args else '') + '&'.join(args)
        logger.debug(url)
        body = []
        if data:
            for k, v in data.items():
                body.append('{0}={1}'.format(quote(k), quote(unicode(v).encode(self.ENCODING))))
        try:
            response = self._opener.open(url, '&'.join(body) if data else None)
        except URLError:
            raise NetworkError()
        # decode() in py2.6 does not support `errors` kwarg.
        html = response.read().decode(self.ENCODING, 'ignore')
        # TODO: BeautifulSoup still needs this?
        html = html.replace(u'<nobr>', u'')     # damn it
        return html

    def load_session(self, session):
        from utils import make_cookie
        self.base_url = '{0}vd{1}/'.format(self.BBS_URL, session.vd)
        self._cj.set_cookie(make_cookie('_U_KEY', session.key))
        self._cj.set_cookie(make_cookie('_U_UID', session.uid))
        self._cj.set_cookie(make_cookie('_U_NUM', session.num))

    def is_logged_in(self):
        html = self._do_action('bbsfoot')
        return html.find('bbsqry?userid=guest') == -1

    def login(self, username, password):
        '''
        return Session if successful else None
        '''
        from random import randint
        session = Session()
        session.vd = str(randint(10000, 100000))
        self.base_url = '{0}vd{1}/'.format(self.BBS_URL, session.vd)

        params = {'type': 2}
        data = {u'id': username, u'pw': password}
        html = self._do_action('bbslogin', params, data)

        try:
            s = re.search(r"setCookie\('(.*)'\)", html).group(1)
        except AttributeError:
            return None

        s = s.split('+')
        session.key = str(int(s[-1]) - 2)
        s = s[0].split('N')
        session.uid = s[-1]
        session.num = str(int(s[0]) + 2)
        self.load_session(session)
        return session

    def logout(self, session=None):
        if session:
            self.load_session(session)
        data = {'Submit': u'注销登录'.encode(self.ENCODING)}
        self._do_action('bbslogout', '', data)
        self._cj.clear()
        self.base_url = self.BBS_URL

    def compose(self, board, title, body, pid=None, gid=None, signature=0):
        '''
        XXX: unicode
        '''
        params = {'board': board}
        lines = body.split(u'\r\n')
        body = []
        for i in lines:
            body.append(u'\r\n'.join(wrap(i, self.LINE_WIDTH)))
        body = u'\r\n'.join(body)
        data = {'title': title,
                'text': body}
        if pid is not None:
            data['reid'] = pid
            data['pid'] = gid
        data['signature'] = signature
        html = self._do_action('bbssnd', params, data)
        return 'Refresh' in html

    def fetch_post(self, board, pid, num):
        params = {'board': board,
                  'file': pid2str(pid),
                  'num': num}
        html = self._do_action('bbscon', params)
        soup = BeautifulSoup(html)
        txt = soup.find('textarea').text
        ret = Post(board, pid, num)
        ret.parse_post(txt)
        # TODO: works for 'x' post
        s = soup.findAll('a')[-1]['href']
        gid = parse_qs(urlparse(s).query).get('gid', None)
        if gid is not None:
            ret.gid = gid[0]
        else:
            ret.gid = None
        return ret

    def fetch_topic(self, board, pid, start=None):
        params = {'board': board, 'file': pid2str(pid)}
        if start:
            params['start'] = start
        html = self._do_action('bbstcon', params)
        soup = BeautifulSoup(html)
        ret = Topic(board, pid)
        items = soup.findAll('table', {'class': 'main'})
        if not items:
            raise ContentError()
        for i in items:
            c = i.tr.td.a['href']
            p = Post(board, parse_pid(c), parse_num(c))
            c = i.findAll('tr')[1].td.textarea.text
            p.parse_post(c)
            ret.post_list.append(p)
        for i in soup.body.center.findAll('a', recursive=False, limit=3):
            if i.text == u'本主题下30篇':
                ret.next_start = int(parse_href(i['href'], 'start'))
        return ret

    def fetch_page(self, board, start=None):
        params = {'board': board}
        if start:
            params['start'] = start
        html = self._do_action('bbstdoc', params)
        soup = BeautifulSoup(html)

        items = soup.findAll('tr')[1:]
        year = datetime.now().year
        ret = Page(board)
        for i in items:
            cells = i.findAll('td')
            h = Header()
            h.board = board
            try:
                h.num = int(cells[0].text) - 1
            except ValueError:
                continue
            h.author = cells[2].text.strip()
            h.date = cells[3].text.strip()
            h.date = datetime.strptime(h.date, self.DATE_FORMAT)
            h.date = h.date.replace(year=year)
            h.title = cells[4].text.strip()[2:]
            h.pid = parse_pid(cells[4].a['href'])
            tmp = cells[5].text.strip()
            if tmp.find('/') != -1:
                tmp = tmp.split('/')
                h.reply_count = int(tmp[0])
                h.view_count = int(tmp[1])
            else:
                h.view_count = int(tmp)
            ret.header_list.append(h)
        # TODO
        for i in soup.body.center.findAll('a', recursive=False):
            if i.text == u'上一页':
                ret.prev_start = int(parse_href(i['href'], 'start')) - 1
        return ret

    def fetch_top10(self):
        html = self._do_action('bbstop10')
        soup = BeautifulSoup(html)
        items = soup.findAll('tr')[1:]
        ret = Page(u'全站十大')
        for i in items:
            cells = i.findAll('td')
            h = Header()
            h.board = cells[1].text.strip()
            h.title = cells[2].text.strip()
            h.pid = parse_pid(cells[2].a['href'])
            h.author = cells[3].text.strip()
            h.reply_count = int(cells[4].text.strip())
            ret.header_list.append(h)
        return ret

    def fetch_hot(self):
        html = self._do_action('bbstopall')
        soup = BeautifulSoup(html)
        items = soup.findAll('tr')
        ret = []
        tmp = None
        for i in items:
            if i.img:
                tmp = []
                continue
            cells = i.findAll('td')
            if not cells[0].text:
                ret.append(tmp)
                continue
            for j in cells:
                h = Header()
                links = j.findAll('a')
                h.title = links[0].text.strip()
                h.board = links[1].text.strip()
                h.pid = parse_pid(links[0]['href'])
                tmp.append(h)
        return ret

    def fetch_favorites(self):
        html = self._do_action('bbsleft')
        soup = BeautifulSoup(html)
        div = soup.findAll('div', {'id': 'div0'})
        if not div:
            raise Error()
        items = div[0]
        items = items.findAll('a')[:-1]
        ret = [i.text for i in items]
        return ret

    def fetch_board_list(self):
        from time import sleep
        ret = BoardManager()
        for i in range(12):
            sleep(1)
            html = self._do_action('bbsboa', {'sec': i})
            soup = BeautifulSoup(html)
            try:
                text = re.search(ur'\[(\w+?)区\]<hr', html, re.UNICODE).group(1)
            except AttributeError:
                raise ContentError(u'请勿过快刷新页面')
            section = Section(i, text)
            items = soup.findAll('tr')[1:]
            for i in items:
                cells = i.findAll('td')
                s = cells[5].text[2:]
                # Some board may have a voting in progress
                if s.endswith(u'V'):
                    s = s[:-1]
                board = Board(cells[2].text, s)
                section.board_list.append(board)
            ret.add(section)
        return ret

    def fetch_face_list(self):
        html = self._do_action('editor/face.htm', {'ptext': 'text'})
        soup = BeautifulSoup(html)
        items = soup.findAll('img')
        ret = {}
        for i in items:
            ret[i['title']] = i['src']
        with open('FaceList.json', 'w') as f:
            json.dump(ret, f)
Esempio n. 20
0
class SafeBoxClient():
    def __init__(self, server_addr="localhost:8000"):
        self.server_addr = server_addr
        self.client_id = self.ccid = self.pin = None
        self.cookie_jar = CookieJar()
        self.curr_ticket = ""

    # startClient: Initializes the client's remaining attributes,
    # this implies starting a session and eventually client registration.
    def startClient(self, ccid, passwd, pin):

        # checking if client is already registered
        def checkClientReg_cb(success):
            if success == False:
                print "User not registered."
                if pin is None:
                    print "Please provide your Citizen Card for registration"
                    reactor.stop()
                    return
                else:
                    print "Registering user..."
                    return self.handleRegister()
            #pprint(self.cookie_jar.__dict__)
            print "User: "******" logged in."
            for cookie in self.cookie_jar:
                #print cookie
                #print type(cookie)
                self.curr_ticket = self.client_id.decryptData(cookie.value)

        # Instanciating ClientIdentity
        def startClientId_cb(key):
            self.client_id = ClientIdentity(self.ccid, passwd, key)
            self.handleStartSession(checkClientReg_cb)

        self.ccid = ccid
        if pin is not None:
            self.pin = pin
        return self.handleGetKey(startClientId_cb)

# Session, Registry and Authentication related opreations
#
    # handleGetKey: handles getkey operations, this happens as the
    # first step of the startClient operation.
    def handleGetKey(self, method):
        def handleGetKey_cb(response):
            defer = Deferred()
            defer.addCallback(method)
            response.deliverBody(DataPrinter(defer, "getkey"))
            return NOT_DONE_YET

        agent = Agent(reactor)
        headers = http_headers.Headers()
        d = agent.request(
                    'GET',
                    'http://localhost:8000/session/?method=getkey',
            headers,
            None)

        d.addCallback(handleGetKey_cb)

        return NOT_DONE_YET

    # handleStartSession: handles startsession operations
    def handleStartSession(self, method):
        def procResponse_cb(response):
            defer = Deferred()
            defer.addCallback(method)
            response.deliverBody(DataPrinter(defer, "bool"))
            return NOT_DONE_YET

        def startSession_cb((signedNonce, nonceid)):
            agent = CookieAgent(Agent(reactor), self.cookie_jar)
            dataq = []
            dataq.append(signedNonce)
            body = _FileProducer(StringIO(self.client_id.encryptData(self.client_id.password)) ,dataq)
            headers = http_headers.Headers()
	    d = agent.request(
                'PUT',
                'http://localhost:8000/session/?method=startsession&ccid='
                + self.ccid + '&nonceid=' + str(nonceid),
                headers,
                body)
            d.addCallback(procResponse_cb)
            return NOT_DONE_YET

        def getNonce_cb(response):
            defer = Deferred()
            defer.addCallback(startSession_cb)
            response.deliverBody(getNonce(defer, self.client_id, self.pin))
            return NOT_DONE_YET

        if self.pin != None:
            agent = Agent(reactor)
            body = FileBodyProducer(StringIO(self.client_id.pub_key.exportKey('PEM')))
            headers = http_headers.Headers()
            d = agent.request(
                'GET',
                'http://localhost:8000/session/?method=getnonce',
                headers,
                body)

            d.addCallback(getNonce_cb)

            return NOT_DONE_YET

        agent = CookieAgent(Agent(reactor), self.cookie_jar)
        body = FileBodyProducer(StringIO(self.client_id.encryptData(self.client_id.password)))
        headers = http_headers.Headers()
        d = agent.request(
            'PUT',
            'http://localhost:8000/session/?method=startsession&ccid='
            + self.ccid + '&nonceid=' + str(-1),
            headers,
            body)
        d.addCallback(procResponse_cb)
        return NOT_DONE_YET


    # handleRegister: Handles the registration process. Also part of the startClient operation.
    def handleRegister(self):
        def checkClientReg_cb(success):
            if success == False:
                print "ERROR: Couldn't register user."
                reactor.stop()
                return

            #pprint(self.cookie_jar.__dict__)
            for cookie in self.cookie_jar:
                #print cookie
                #print type(cookie)
                self.curr_ticket = self.client_id.decryptData(cookie.value)
            print "Registration Successful."
            print "User: "******" logged in."
        def procResponse_cb(response, method):
            defer = Deferred()
            defer.addCallback(method)
            response.deliverBody(DataPrinter(defer, "bool"))
            return NOT_DONE_YET

        def register_cb((signedNonce, nonceid)):
            agent = CookieAgent(Agent(reactor), self.cookie_jar)
            dataq = []
            dataq.append(signedNonce)
            dataq.append(self.client_id.encryptData(self.client_id.password))
            # Sending the Certificate and the Sub CA to the server
            if self.pin is  None:
                print "ERROR! Check the pin!"
                reactor.stop()
            cert = cc.get_certificate(cc.CERT_LABEL, self.pin)
            #print type(cert.as_pem())
            #print cert.as_pem()
            if cert is None:
                print "ERROR! Check the pin"
                reactor.stop()
            subca = cc.get_certificate(cc.SUBCA_LABEL, self.pin)
            #print type(subca.as_pem())
            #print subca.as_pem()
            if subca is None:
                print "ERROR! Check the pin"
                reactor.stop()

            enc_cert = b64encode(cert.as_pem())
            #print "cert len: ", len(enc_cert)
            dataq.append(enc_cert)
            enc_subca = b64encode(subca.as_pem())
            #print "sub ca len: ", len(enc_subca)
            dataq.append(enc_subca)
            dataq.append(self.client_id.pub_key.exportKey('PEM'))
            ext_key = self.client_id.pub_key.exportKey('PEM')
            if self.pin is None:
				print "ERROR! Check the pin or the CC"
				reactor.stop()
            signed_ext_key = cc.sign(ext_key, cc.KEY_LABEL, self.pin)
            enc_sek = b64encode(signed_ext_key)
            #print "encoded ext key: ", enc_sek
            #print "len encoded: ", len(enc_sek)
            dataq.append(enc_sek)
            body = FileProducer2(dataq)
            headers = http_headers.Headers()
            #print "Password:"******"LEN:", len(self.client_id.encryptData(self.client_id.password))
            d = agent.request(
                'PUT',
                'http://localhost:8000/pboxes/?method=register'
                + '&nonceid=' + str(nonceid),
                headers,
                body)
            d.addCallback(procResponse_cb, checkClientReg_cb)

        def getNonce_cb(response):
            defer = Deferred()
            defer.addCallback(register_cb)
            response.deliverBody(getNonce(defer, self.client_id, self.pin))
            return NOT_DONE_YET

        agent = Agent(reactor)
        body = FileBodyProducer(StringIO(self.client_id.pub_key.exportKey('PEM')))
        headers = http_headers.Headers()
        d = agent.request(
            'GET',
            'http://localhost:8000/session/?method=getnonce',
            headers,
            body)

        d.addCallback(getNonce_cb)
        return NOT_DONE_YET

    def processCookie(self, uri):
        dci = number.long_to_bytes(number.bytes_to_long(self.curr_ticket) + long("1", base=10))
        #print "incremented ticket", number.bytes_to_long(dci)
        self.curr_ticket = dci
        sci = self.client_id.signData(str(dci))
        enc = self.client_id.encryptData(sci)
        for cookie in self.cookie_jar:
            cookie.value = enc
            cookie.path = uri
            self.cookie_jar.clear()
            self.cookie_jar.set_cookie(cookie)
        return dci
        #print cookie

# List Operations
#
    # handleList: handles every list command
    def handleList_cb(self, response):
        defer = Deferred()
        response.deliverBody(DataPrinter(defer, "list"))
        return NOT_DONE_YET

    def handleListPboxes(self):
        args = ("list", str(self.ccid))
        salt = self.processCookie("/pboxes")
        body = FileBodyProducer(StringIO(self.client_id.genHashArgs(args, salt)))
        #print "hashed:", self.client_id.genHashArgs(args, salt)
        agent = CookieAgent(Agent(reactor), self.cookie_jar)
        headers = http_headers.Headers()
        d = agent.request(
            'GET',
            'http://localhost:8000/pboxes/?method=list&ccid='
            + self.ccid,
            headers,
            body)
        d.addCallback(self.handleList_cb)
        return NOT_DONE_YET

    def handleListFiles(self):
        args = ("list", str(self.ccid))
        salt = self.processCookie("/files")
        body = FileBodyProducer(StringIO(self.client_id.genHashArgs(args, salt)))

        agent = CookieAgent(Agent(reactor), self.cookie_jar)
        headers = http_headers.Headers()
        d = agent.request(
            'GET',
            'http://localhost:8000/files/?method=list&ccid='
            + self.ccid,
            headers,
            body)
        d.addCallback(self.handleList_cb)
        return NOT_DONE_YET

    def handleListShares(self):
        args = ("list", str(self.ccid))
        salt = self.processCookie("/shares")
        body = FileBodyProducer(StringIO(self.client_id.genHashArgs(args, salt)))

        agent = CookieAgent(Agent(reactor), self.cookie_jar)
        headers = http_headers.Headers()
        d = agent.request(
            'GET',
            'http://localhost:8000/shares/?method=list&ccid='
            + self.ccid,
            headers,
            body)
        d.addCallback(self.handleList_cb)
        return NOT_DONE_YET

# Get Operations
#
    # handleGetMData: Handles get pbox metadata operations.
    def handleGetMData(self, data):
        #data = (method, tgtccid)
        pprint(data)
        def handleGetMData_cb(response):
            defer = Deferred()
            defer.addCallback(data[0])
            response.deliverBody(DataPrinter(defer, "getmdata"))
            return NOT_DONE_YET

        args = ("get_mdata", str(self.ccid), data[1])
        salt = self.processCookie("/pboxes")
        body = FileBodyProducer(StringIO(self.client_id.genHashArgs(args, salt)))

        agent = CookieAgent(Agent(reactor), self.cookie_jar)
        headers = http_headers.Headers()
        d = agent.request(
			'GET',
            'http://localhost:8000/pboxes/?method=get_mdata&ccid='
            + self.ccid + "&tgtccid=" + data[1],
            headers,
            body)

        d.addCallback(handleGetMData_cb)

        return NOT_DONE_YET

    # handleGetFileMData: Handles get file metadata operations.
    def handleGetFileMData(self, data):
        #data = (method, fileid)
        def handleGetFileMData_cb(response):
            defer = Deferred()
            defer.addCallback(data[0])
            response.deliverBody(DataPrinter(defer, "getmdata"))
            return NOT_DONE_YET

        args = ("get_mdata", str(self.ccid), data[1])
        salt = self.processCookie("/files")
        body = FileBodyProducer(StringIO(self.client_id.genHashArgs(args, salt)))

        agent = CookieAgent(Agent(reactor), self.cookie_jar)
        headers = http_headers.Headers()
        d = agent.request(
            'GET',
            'http://localhost:8000/files/?method=get_mdata&ccid='
            + self.ccid + "&fileid=" + data[1],
            headers,
            body)

        d.addCallback(handleGetFileMData_cb)

        return NOT_DONE_YET

    # handleGetShareMData: Handles get share metadata operations.
    def handleGetShareMData(self, data):
        #data = (method, fileid)
        def handleGetShareMData_cb(response):
            defer = Deferred()
            defer.addCallback(data[0])
            response.deliverBody(DataPrinter(defer, "getmdata"))
            return NOT_DONE_YET

        args = ("get_mdata", str(self.ccid), data[1])
        salt = self.processCookie("/shares")
        body = FileBodyProducer(StringIO(self.client_id.genHashArgs(args, salt)))

        agent = CookieAgent(Agent(reactor), self.cookie_jar)
        headers = http_headers.Headers()
        d = agent.request(
			'GET',
            'http://localhost:8000/shares/?method=get_mdata&ccid='
            + self.ccid + "&fileid=" + data[1],
            headers,
            body)

        d.addCallback(handleGetShareMData_cb)

        return NOT_DONE_YET

    # handleGet: handles get file
    #def handleGet(self, line):
    def printResult_cb(self, data):
        pprint(data) #TODO: Format this!
        return NOT_DONE_YET

    # for info requests
    def handleGetInfo(self, s):
        if s[1].lower() == "pboxinfo":
            return self.handleGetMData((self.printResult_cb, s[2].lower()))
        elif s[1].lower() == "fileinfo":
            return self.handleGetFileMData((self.printResult_cb, s[2].lower()))
        elif s[1].lower() == "shareinfo":
            return self.handleGetShareMData((self.printResult_cb, s[2].lower()))

    # Decrypt and write the file
    def writeFile_cb(self, ignore, s): #we should implement http error code checking
        fileId = s[2]
        enc_file = open(fileId, "r")
        if len(s) == 4:
            dec_file = open(s[3], "w")
        else:
            dec_file = open(fileId + "_decrypted", "w")

        enc_key = enc_file.read(IV_KEY_SIZE_B64)
        # print "debugging: iv key writefile"
        # print enc_key
        print "Decrypting file..."
        key = self.client_id.decryptData(enc_key)
        enc_iv = enc_file.read(IV_KEY_SIZE_B64)
        #print enc_iv
        iv = self.client_id.decryptData(enc_iv)
        print iv
        self.client_id.decryptFileSym(enc_file, dec_file, key, iv)
        print "File written."

    # for get file
    def handleGetFile(self, s):
        def handleGetFile_cb(response, f):
            finished = Deferred()
            finished.addCallback(self.writeFile_cb, s)
            cons = FileConsumer(f)
            response.deliverBody(FileDownload(finished, cons))
            print "Downloading file..."
            return finished

        fileId = s[2]
        args = ("getfile", str(self.ccid), str(fileId))
        salt = self.processCookie("/files")
        body = FileBodyProducer(StringIO(self.client_id.genHashArgs(args, salt)))

        agent = CookieAgent(Agent(reactor), self.cookie_jar)
        headers = http_headers.Headers()
        d = agent.request(
            'GET',
            'http://localhost:8000/files/?method=getfile&ccid=' + self.ccid
            + '&fileid=' + str(fileId),
            headers,
            body)
        f = open(fileId, "w")
        d.addCallback(handleGetFile_cb, f)
        return NOT_DONE_YET

    # for get shared
    def handleGetShared(self, s):
        def handleGetShared_cb(response, f):
            finished = Deferred()
            finished.addCallback(self.writeFile_cb, s)
            cons = FileConsumer(f)
            response.deliverBody(FileDownload(finished, cons))
            print "Downloading file..."
            return finished

        fileId = s[2]
        args = ("getshared", str(self.ccid), str(fileId))
        salt = self.processCookie("/shares")
        body = FileBodyProducer(StringIO(self.client_id.genHashArgs(args, salt)))

        agent = CookieAgent(Agent(reactor), self.cookie_jar)
        headers = http_headers.Headers()
        d = agent.request(
            'GET',
            'http://localhost:8000/shares/?method=getshared&ccid=' + self.ccid
            + '&fileid=' + fileId,
            headers,
            body)
        f = open(fileId, "w")
        d.addCallback(handleGetShared_cb, f)
        return NOT_DONE_YET

# Put Operations
    # printPutReply_cb: prints put and update responses
    def printPutReply_cb(self, response):
        print "Done."

        defer = Deferred()
        response.deliverBody(DataPrinter(defer, "getmdata"))
        return NOT_DONE_YET

    # handlePutFile: handles file upload
    def handlePutFile(self, line):
        print "Encrypting file..."
        s = line.split()
        file = open(s[2], 'r')
        enc_file = open("enc_fileout", 'w')
        crd = self.client_id.encryptFileSym(file, enc_file)

        args = ("putfile", str(self.ccid), os.path.basename(s[2]))
        salt = self.processCookie("/files")

        dataq = []
        dataq.append( self.client_id.genHashArgs(args, salt))
        dataq.append( self.client_id.encryptData(crd[0], self.client_id.pub_key))
        dataq.append( self.client_id.encryptData(crd[1]) )
        agent = CookieAgent(Agent(reactor), self.cookie_jar)
        #print crd[1]
        # print "debugging:key, iv putfile"
        # print dataq[1]
        # print len(dataq[1])
        # print dataq[2]
        # print len(dataq[2])
        print "Uploading file..."
        enc_file = open("enc_fileout", 'r')
        body = _FileProducer(enc_file ,dataq)
        headers = http_headers.Headers()
        d = agent.request(
            'PUT',
            'http://localhost:8000/files/?method=putfile&ccid='
            + self.ccid + "&name=" + os.path.basename(s[2]),
            headers,
            body)
        d.addCallback(self.printPutReply_cb)

        return NOT_DONE_YET

# Update Operations
#
    #handles update commands
    def handleUpdate(self, s):
        def encryptFile_cb(data):#TODO: Some error checking here.

            def updateFile_cb(iv):
                #data = (key,)
                print "Updating file..."

                args = ("updatefile", str(self.ccid), os.path.basename(s[3]), s[2])
                salt = self.processCookie("/files")

                dataq = []
                dataq.append( self.client_id.genHashArgs(args, salt))
                dataq.append( iv )
                # print "debugging:ticket, iv updatefile"
                # print dataq[0]
                # print dataq[1]
                # print len(dataq[1])
                agent = CookieAgent(Agent(reactor), self.cookie_jar)
                print "Uploading file..."
                enc_file = open("enc_fileout", 'r')
                body = _FileProducer(enc_file ,dataq)
                headers = http_headers.Headers()
                d = agent.request(
                    'POST',
                    'http://localhost:8000/files/?method=updatefile&ccid='
                    + self.ccid + "&name=" + os.path.basename(s[3]) + "&fileid=" + s[2] ,
                    headers,
                    body)
                d.addCallback(self.printPutReply_cb)

                return NOT_DONE_YET

            def updateShared_cb(iv):
                print "Updating file..."

                args = ("updateshared", str(self.ccid), os.path.basename(s[3]), s[2])
                salt = self.processCookie("/shares")

                dataq = []
                dataq.append( self.client_id.genHashArgs(args, salt))
                dataq.append( iv )
                # print "debugging:ticket, iv updatefile"
                # print dataq[0]
                # print dataq[1]
                # print len(dataq[1])
                print "Uploading file..."
                agent = CookieAgent(Agent(reactor), self.cookie_jar)
                enc_file = open("enc_fileout", 'r')
                body = _FileProducer(enc_file ,dataq)
                headers = http_headers.Headers()
                d = agent.request(
                    'POST',
                    'http://localhost:8000/shares/?method=updateshared&ccid='
                    + self.ccid + "&name=" + os.path.basename(s[3]) + "&fileid=" + s[2] ,
                    headers,
                    body)
                d.addCallback(self.printPutReply_cb)

                return NOT_DONE_YET

            if isinstance(data, basestring):
                print data
                return

            print "Encrypting file..."
            #print data["data"]["SymKey"]
            enc_key = data["data"]["SymKey"]
            key = self.client_id.decryptData(enc_key, self.client_id.priv_key)
            #print len(key)
            file = open(s[3], 'r')
            enc_file = open("enc_fileout", 'w')
            crd = self.client_id.encryptFileSym(file, enc_file, key=key)
            new_iv =  self.client_id.encryptData(crd[1])
            if s[1] == "shared":
                return updateShared_cb(new_iv)
            return updateFile_cb(new_iv)


        hsmd_data = (encryptFile_cb, s[2])
        if s[1] == "file":
            return self.handleGetFileMData(hsmd_data)
        return self.handleGetShareMData(hsmd_data)

    def handleUpdateSharePerm(self, s):
        args = ("updateshareperm", str(self.ccid), s[3], s[2], s[4])
        salt = self.processCookie("/shares")
        body = FileBodyProducer(StringIO(self.client_id.genHashArgs(args, salt)))

        agent = CookieAgent(Agent(reactor), self.cookie_jar)
        headers = http_headers.Headers()
        d = agent.request(
            'POST',
            'http://localhost:8000/shares/?method=updateshareperm&ccid='
            + self.ccid + "&rccid=" + s[3] + "&fileid=" + s[2] + "&writeable=" + s[4] ,
            headers,
            body)
        d.addCallback(self.printPutReply_cb)

        return NOT_DONE_YET

#Delete Operaions
#
    # handleDelete: handles delete commands
    def handleDelete(self, line):
        def printDeleteReply_cb(data):
            if not data:
                print "Done."
            else:
                print "Done."

        def deleteFile_cb():
            args = ("delete", str(self.ccid), s[2])
            salt = self.processCookie("/files")
            body = FileBodyProducer(StringIO(self.client_id.genHashArgs(args, salt)))

            agent = CookieAgent(Agent(reactor), self.cookie_jar)
            headers = http_headers.Headers()
            d = agent.request(
                'DELETE',
                'http://localhost:8000/files/?method=delete&ccid='
                + self.ccid + "&fileid=" + s[2],
                headers,
                body)

            d.addCallback(printDeleteReply_cb)

        def deleteShare_cb():
            args = ("delete", str(self.ccid), s[2], s[3])
            salt = self.processCookie("/shares")
            body = FileBodyProducer(StringIO(self.client_id.genHashArgs(args, salt)))

            agent = CookieAgent(Agent(reactor), self.cookie_jar)
            headers = http_headers.Headers()
            d = agent.request(
                'DELETE',
                'http://localhost:8000/shares/?method=delete&ccid='
                + self.ccid + "&fileid=" + s[2] + "&rccid=" + s[3],
                headers,
                body)

            d.addCallback(printDeleteReply_cb)

        s = line.split()
        if len(s) == 4:
            return deleteShare_cb()
        if len(s) == 3:
            return deleteFile_cb()

        print "Error: invalid arguments!\n"
        print "Usage: delete <file|share> <fileid> <None|rccid>"
        return


# Share Operation
#
    def handleShare(self, line):

        def getFKey_cb(data):
            enc_key = data["data"]["SymKey"]

            def getDstKey_cb(data):
                dstkey = data["data"]["PubKey"]
                print "pubkey" + dstkey

                def shareFile_cb():
                    args = ("delete", str(self.ccid), s[3], s[2])
                    salt = self.processCookie("/shares")

                    dataq = []
                    dataq.append(self.client_id.genHashArgs(args, salt))
                    dataq.append(enc_sym_key)
                    print "Uploading symkey..."

                    agent = CookieAgent(Agent(reactor), self.cookie_jar)
                    body = _FileProducer(StringIO("") ,dataq)
                    headers = http_headers.Headers()
                    d = agent.request(
                        'PUT',
                        'http://localhost:8000/shares/?method=sharefile&ccid='
                        + self.ccid + "&rccid=" + s[3] + "&fileid=" + s[2],
                        headers,
                        body)
                    d.addCallback(self.printPutReply_cb)

                    return d

                #enc_key = data["data"]["SymKey"]
                sym_key = self.client_id.decryptData(enc_key, self.client_id.priv_key)
                dstkey = RSA.importKey(dstkey)
                enc_sym_key = self.client_id.encryptData(sym_key, dstkey)
                return shareFile_cb()



            hfmd_data = (getDstKey_cb, s[3].lower())
            return self.handleGetMData(hfmd_data)

        s = line.split()
        if len(s) == 4:
            hmd_data = (getFKey_cb, s[2].lower())
            return self.handleGetFileMData(hmd_data)

        else:
            if s[1].lower() != "file":
                print "Error: invalid arguments!\n"
                print "Usage: share file <fileid> <recipient's ccid>"
                return
Esempio n. 21
0
class Site(object):
    """
    **EarwigBot: Wiki Toolset: Site**

    Represents a site, with support for API queries and returning
    :py:class:`~earwigbot.wiki.page.Page`,
    :py:class:`~earwigbot.wiki.user.User`,
    and :py:class:`~earwigbot.wiki.category.Category` objects. The constructor
    takes a bunch of arguments and you probably won't need to call it directly,
    rather :py:meth:`wiki.get_site() <earwigbot.wiki.sitesdb.SitesDB.get_site>`
    for returning :py:class:`Site`
    instances, :py:meth:`wiki.add_site()
    <earwigbot.wiki.sitesdb.SitesDB.add_site>` for adding new ones to our
    database, and :py:meth:`wiki.remove_site()
    <earwigbot.wiki.sitesdb.SitesDB.remove_site>` for removing old ones from
    our database, should suffice.

    *Attributes:*

    - :py:attr:`name`:    the site's name (or "wikiid"), like ``"enwiki"``
    - :py:attr:`project`: the site's project name, like ``"wikipedia"``
    - :py:attr:`lang`:    the site's language code, like ``"en"``
    - :py:attr:`domain`:  the site's web domain, like ``"en.wikipedia.org"``
    - :py:attr:`url`:     the site's URL, like ``"https://en.wikipedia.org"``

    *Public methods:*

    - :py:meth:`api_query`:            does an API query with kwargs as params
    - :py:meth:`sql_query`:            does an SQL query and yields its results
    - :py:meth:`get_maxlag`:           returns the internal database lag
    - :py:meth:`get_replag`:           estimates the external database lag
    - :py:meth:`namespace_id_to_name`: returns names associated with an NS id
    - :py:meth:`namespace_name_to_id`: returns the ID associated with a NS name
    - :py:meth:`get_page`:             returns a Page for the given title
    - :py:meth:`get_category`:         returns a Category for the given title
    - :py:meth:`get_user`:             returns a User object for the given name
    - :py:meth:`delegate`:             controls when the API or SQL is used
    """
    SERVICE_API = 1
    SERVICE_SQL = 2

    def __init__(self, name=None, project=None, lang=None, base_url=None,
                 article_path=None, script_path=None, sql=None,
                 namespaces=None, login=(None, None), cookiejar=None,
                 user_agent=None, use_https=False, assert_edit=None,
                 maxlag=None, wait_between_queries=2, logger=None,
                 search_config=None):
        """Constructor for new Site instances.

        This probably isn't necessary to call yourself unless you're building a
        Site that's not in your config and you don't want to add it - normally
        all you need is wiki.get_site(name), which creates the Site for you
        based on your config file and the sites database. We accept a bunch of
        kwargs, but the only ones you really "need" are *base_url* and
        *script_path*; this is enough to figure out an API url. *login*, a
        tuple of (username, password), is highly recommended. *cookiejar* will
        be used to store cookies, and we'll use a normal CookieJar if none is
        given.

        First, we'll store the given arguments as attributes, then set up our
        URL opener. We'll load any of the attributes that weren't given from
        the API, and then log in if a username/pass was given and we aren't
        already logged in.
        """
        # Attributes referring to site information, filled in by an API query
        # if they are missing (and an API url can be determined):
        self._name = name
        self._project = project
        self._lang = lang
        self._base_url = base_url
        self._article_path = article_path
        self._script_path = script_path
        self._namespaces = namespaces

        # Attributes used for API queries:
        self._use_https = use_https
        self._assert_edit = assert_edit
        self._maxlag = maxlag
        self._wait_between_queries = wait_between_queries
        self._max_retries = 6
        self._last_query_time = 0
        self._api_lock = Lock()
        self._api_info_cache = {"maxlag": 0, "lastcheck": 0}

        # Attributes used for SQL queries:
        if sql:
            self._sql_data = sql
        else:
            self._sql_data = {}
        self._sql_conn = None
        self._sql_lock = Lock()
        self._sql_info_cache = {"replag": 0, "lastcheck": 0, "usable": None}

        # Attribute used in copyright violation checks (see CopyrightMixIn):
        if search_config:
            self._search_config = search_config
        else:
            self._search_config = {}

        # Set up cookiejar and URL opener for making API queries:
        if cookiejar is not None:
            self._cookiejar = cookiejar
        else:
            self._cookiejar = CookieJar()
        if not user_agent:
            user_agent = constants.USER_AGENT  # Set default UA
        self._opener = build_opener(HTTPCookieProcessor(self._cookiejar))
        self._opener.addheaders = [("User-Agent", user_agent),
                                   ("Accept-Encoding", "gzip")]

        # Set up our internal logger:
        if logger:
            self._logger = logger
        else:  # Just set up a null logger to eat up our messages:
            self._logger = getLogger("earwigbot.wiki")
            self._logger.addHandler(NullHandler())

        # Get all of the above attributes that were not specified as arguments:
        self._load_attributes()

        # If we have a name/pass and the API says we're not logged in, log in:
        self._login_info = name, password = login
        if name and password:
            logged_in_as = self._get_username_from_cookies()
            if not logged_in_as or name.replace("_", " ") != logged_in_as:
                self._login(login)

    def __repr__(self):
        """Return the canonical string representation of the Site."""
        res = ", ".join((
            "Site(name={_name!r}", "project={_project!r}", "lang={_lang!r}",
            "base_url={_base_url!r}", "article_path={_article_path!r}",
            "script_path={_script_path!r}", "use_https={_use_https!r}",
            "assert_edit={_assert_edit!r}", "maxlag={_maxlag!r}",
            "sql={_sql_data!r}", "login={0}", "user_agent={2!r}",
            "cookiejar={1})"))
        name, password = self._login_info
        login = "******".format(repr(name), "hidden" if password else None)
        cookies = self._cookiejar.__class__.__name__
        if hasattr(self._cookiejar, "filename"):
            cookies += "({0!r})".format(getattr(self._cookiejar, "filename"))
        else:
            cookies += "()"
        agent = self._opener.addheaders[0][1]
        return res.format(login, cookies, agent, **self.__dict__)

    def __str__(self):
        """Return a nice string representation of the Site."""
        res = "<Site {0} ({1}:{2}) at {3}>"
        return res.format(self.name, self.project, self.lang, self.domain)

    def _unicodeify(self, value, encoding="utf8"):
        """Return input as unicode if it's not unicode to begin with."""
        if isinstance(value, unicode):
            return value
        return unicode(value, encoding)

    def _urlencode_utf8(self, params):
        """Implement urllib.urlencode() with support for unicode input."""
        enc = lambda s: s.encode("utf8") if isinstance(s, unicode) else str(s)
        args = []
        for key, val in params.iteritems():
            key = quote_plus(enc(key))
            val = quote_plus(enc(val))
            args.append(key + "=" + val)
        return "&".join(args)

    def _api_query(self, params, tries=0, wait=5, ignore_maxlag=False):
        """Do an API query with *params* as a dict of parameters.

        See the documentation for :py:meth:`api_query` for full implementation
        details.
        """
        since_last_query = time() - self._last_query_time  # Throttling support
        if since_last_query < self._wait_between_queries:
            wait_time = self._wait_between_queries - since_last_query
            log = "Throttled: waiting {0} seconds".format(round(wait_time, 2))
            self._logger.debug(log)
            sleep(wait_time)
        self._last_query_time = time()

        url, data = self._build_api_query(params, ignore_maxlag)
        if "lgpassword" in params:
            self._logger.debug("{0} -> <hidden>".format(url))
        else:
            self._logger.debug("{0} -> {1}".format(url, data))

        try:
            response = self._opener.open(url, data)
        except URLError as error:
            if hasattr(error, "reason"):
                e = "API query failed: {0}.".format(error.reason)
            elif hasattr(error, "code"):
                e = "API query failed: got an error code of {0}."
                e = e.format(error.code)
            else:
                e = "API query failed."
            raise exceptions.APIError(e)

        result = response.read()
        if response.headers.get("Content-Encoding") == "gzip":
            stream = StringIO(result)
            gzipper = GzipFile(fileobj=stream)
            result = gzipper.read()

        return self._handle_api_query_result(result, params, tries, wait)

    def _build_api_query(self, params, ignore_maxlag):
        """Given API query params, return the URL to query and POST data."""
        if not self._base_url or self._script_path is None:
            e = "Tried to do an API query, but no API URL is known."
            raise exceptions.APIError(e)

        url = ''.join((self.url, self._script_path, "/api.php"))
        params["format"] = "json"  # This is the only format we understand
        if self._assert_edit:  # If requested, ensure that we're logged in
            params["assert"] = self._assert_edit
        if self._maxlag and not ignore_maxlag:
            # If requested, don't overload the servers:
            params["maxlag"] = self._maxlag

        data = self._urlencode_utf8(params)
        return url, data

    def _handle_api_query_result(self, result, params, tries, wait):
        """Given the result of an API query, attempt to return useful data."""
        try:
            res = loads(result)  # Try to parse as a JSON object
        except ValueError:
            e = "API query failed: JSON could not be decoded."
            raise exceptions.APIError(e)

        try:
            code = res["error"]["code"]
            info = res["error"]["info"]
        except (TypeError, KeyError):  # Having these keys indicates a problem
            return res  # All is well; return the decoded JSON

        if code == "maxlag":  # We've been throttled by the server
            if tries >= self._max_retries:
                e = "Maximum number of retries reached ({0})."
                raise exceptions.APIError(e.format(self._max_retries))
            tries += 1
            msg = 'Server says "{0}"; retrying in {1} seconds ({2}/{3})'
            self._logger.info(msg.format(info, wait, tries, self._max_retries))
            sleep(wait)
            return self._api_query(params, tries=tries, wait=wait*2)
        else:  # Some unknown error occurred
            e = 'API query failed: got error "{0}"; server says: "{1}".'
            error = exceptions.APIError(e.format(code, info))
            error.code, error.info = code, info
            raise error

    def _load_attributes(self, force=False):
        """Load data about our Site from the API.

        This function is called by __init__() when one of the site attributes
        was not given as a keyword argument. We'll do an API query to get the
        missing data, but only if there actually *is* missing data.

        Additionally, you can call this with *force* set to True to forcibly
        reload all attributes.
        """
        # All attributes to be loaded, except _namespaces, which is a special
        # case because it requires additional params in the API query:
        attrs = [self._name, self._project, self._lang, self._base_url,
            self._article_path, self._script_path]

        params = {"action": "query", "meta": "siteinfo", "siprop": "general"}

        if not self._namespaces or force:
            params["siprop"] += "|namespaces|namespacealiases"
            result = self.api_query(**params)
            self._load_namespaces(result)
        elif all(attrs):  # Everything is already specified and we're not told
            return        # to force a reload, so do nothing
        else:  # We're only loading attributes other than _namespaces
            result = self.api_query(**params)

        res = result["query"]["general"]
        self._name = res["wikiid"]
        self._project = res["sitename"].lower()
        self._lang = res["lang"]
        self._base_url = res["server"]
        self._article_path = res["articlepath"]
        self._script_path = res["scriptpath"]

    def _load_namespaces(self, result):
        """Fill self._namespaces with a dict of namespace IDs and names.

        Called by _load_attributes() with API data as *result* when
        self._namespaces was not given as an kwarg to __init__().
        """
        self._namespaces = {}

        for namespace in result["query"]["namespaces"].values():
            ns_id = namespace["id"]
            name = namespace["*"]
            try:
                canonical = namespace["canonical"]
            except KeyError:
                self._namespaces[ns_id] = [name]
            else:
                if name != canonical:
                    self._namespaces[ns_id] = [name, canonical]
                else:
                    self._namespaces[ns_id] = [name]

        for namespace in result["query"]["namespacealiases"]:
            ns_id = namespace["id"]
            alias = namespace["*"]
            self._namespaces[ns_id].append(alias)

    def _get_cookie(self, name, domain):
        """Return the named cookie unless it is expired or doesn't exist."""
        for cookie in self._cookiejar:
            if cookie.name == name and cookie.domain == domain:
                if cookie.is_expired():
                    break
                return cookie

    def _get_username_from_cookies(self):
        """Try to return our username based solely on cookies.

        First, we'll look for a cookie named self._name + "Token", like
        "enwikiToken". If it exists and isn't expired, we'll assume it's valid
        and try to return the value of the cookie self._name + "UserName" (like
        "enwikiUserName"). This should work fine on wikis without single-user
        login.

        If `enwikiToken` doesn't exist, we'll try to find a cookie named
        `centralauth_Token`. If this exists and is not expired, we'll try to
        return the value of `centralauth_User`.

        If we didn't get any matches, we'll return None. Our goal here isn't to
        return the most likely username, or what we *want* our username to be
        (for that, we'd do self._login_info[0]), but rather to get our current
        username without an unnecessary ?action=query&meta=userinfo API query.
        """
        name = ''.join((self._name, "Token"))
        cookie = self._get_cookie(name, self.domain)

        if cookie:
            name = ''.join((self._name, "UserName"))
            user_name = self._get_cookie(name, self.domain)
            if user_name:
                return unquote_plus(user_name.value)

        for cookie in self._cookiejar:
            if cookie.name != "centralauth_Token" or cookie.is_expired():
                continue
            base = cookie.domain
            if base.startswith(".") and not cookie.domain_initial_dot:
                base = base[1:]
            if self.domain.endswith(base):
                user_name = self._get_cookie("centralauth_User", cookie.domain)
                if user_name:
                    return unquote_plus(user_name.value)

    def _get_username_from_api(self):
        """Do a simple API query to get our username and return it.

        This is a reliable way to make sure we are actually logged in, because
        it doesn't deal with annoying cookie logic, but it results in an API
        query that is unnecessary in some cases.

        Called by _get_username() (in turn called by get_user() with no
        username argument) when cookie lookup fails, probably indicating that
        we are logged out.
        """
        result = self.api_query(action="query", meta="userinfo")
        return result["query"]["userinfo"]["name"]

    def _get_username(self):
        """Return the name of the current user, whether logged in or not.

        First, we'll try to deduce it solely from cookies, to avoid an
        unnecessary API query. For the cookie-detection method, see
        _get_username_from_cookies()'s docs.

        If our username isn't in cookies, then we're probably not logged in, or
        something fishy is going on (like forced logout). In this case, do a
        single API query for our username (or IP address) and return that.
        """
        name = self._get_username_from_cookies()
        if name:
            return name
        return self._get_username_from_api()

    def _save_cookiejar(self):
        """Try to save our cookiejar after doing a (normal) login or logout.

        Calls the standard .save() method with no filename. Don't fret if our
        cookiejar doesn't support saving (CookieJar raises AttributeError,
        FileCookieJar raises NotImplementedError) or no default filename was
        given (LWPCookieJar and MozillaCookieJar raise ValueError).
        """
        if hasattr(self._cookiejar, "save"):
            try:
                getattr(self._cookiejar, "save")()
            except (NotImplementedError, ValueError):
                pass

    def _login(self, login, token=None, attempt=0):
        """Safely login through the API.

        Normally, this is called by __init__() if a username and password have
        been provided and no valid login cookies were found. The only other
        time it needs to be called is when those cookies expire, which is done
        automatically by api_query() if a query fails.

        Recent versions of MediaWiki's API have fixed a CSRF vulnerability,
        requiring login to be done in two separate requests. If the response
        from from our initial request is "NeedToken", we'll do another one with
        the token. If login is successful, we'll try to save our cookiejar.

        Raises LoginError on login errors (duh), like bad passwords and
        nonexistent usernames.

        *login* is a (username, password) tuple. *token* is the token returned
        from our first request, and *attempt* is to prevent getting stuck in a
        loop if MediaWiki isn't acting right.
        """
        name, password = login
        if token:
            result = self.api_query(action="login", lgname=name,
                                    lgpassword=password, lgtoken=token)
        else:
            result = self.api_query(action="login", lgname=name,
                                    lgpassword=password)

        res = result["login"]["result"]
        if res == "Success":
            self._save_cookiejar()
        elif res == "NeedToken" and attempt == 0:
            token = result["login"]["token"]
            return self._login(login, token, attempt=1)
        else:
            if res == "Illegal":
                e = "The provided username is illegal."
            elif res == "NotExists":
                e = "The provided username does not exist."
            elif res == "EmptyPass":
                e = "No password was given."
            elif res == "WrongPass" or res == "WrongPluginPass":
                e = "The given password is incorrect."
            else:
                e = "Couldn't login; server says '{0}'.".format(res)
            raise exceptions.LoginError(e)

    def _logout(self):
        """Safely logout through the API.

        We'll do a simple API request (api.php?action=logout), clear our
        cookiejar (which probably contains now-invalidated cookies) and try to
        save it, if it supports that sort of thing.
        """
        self.api_query(action="logout")
        self._cookiejar.clear()
        self._save_cookiejar()

    def _sql_connect(self, **kwargs):
        """Attempt to establish a connection with this site's SQL database.

        oursql.connect() will be called with self._sql_data as its kwargs.
        Any kwargs given to this function will be passed to connect() and will
        have precedence over the config file.

        Will raise SQLError() if the module "oursql" is not available. oursql
        may raise its own exceptions (e.g. oursql.InterfaceError) if it cannot
        establish a connection.
        """
        if not oursql:
            e = "Module 'oursql' is required for SQL queries."
            raise exceptions.SQLError(e)

        args = self._sql_data
        for key, value in kwargs.iteritems():
            args[key] = value

        if "read_default_file" not in args and "user" not in args and "passwd" not in args:
            args["read_default_file"] = expanduser("~/.my.cnf")

        if "autoping" not in args:
            args["autoping"] = True

        if "autoreconnect" not in args:
            args["autoreconnect"] = True

        self._sql_conn = oursql.connect(**args)

    def _get_service_order(self):
        """Return a preferred order for using services (e.g. the API and SQL).

        A list is returned, starting with the most preferred service first and
        ending with the least preferred one. Currently, there are only two
        services. SERVICE_API will always be included since the API is expected
        to be always usable. In normal circumstances, self.SERVICE_SQL will be
        first (with the API second), since using SQL directly is easier on the
        servers than making web queries with the API. self.SERVICE_SQL will be
        second if replag is greater than three minutes (a cached value updated
        every two minutes at most), *unless* API lag is also very high.
        self.SERVICE_SQL will not be included in the list if we cannot form a
        proper SQL connection.
        """
        now = time()
        if now - self._sql_info_cache["lastcheck"] > 120:
            self._sql_info_cache["lastcheck"] = now
            try:
                self._sql_info_cache["replag"] = sqllag = self.get_replag()
            except (exceptions.SQLError, oursql.Error):
                self._sql_info_cache["usable"] = False
                return [self.SERVICE_API]
            self._sql_info_cache["usable"] = True
        else:
            if not self._sql_info_cache["usable"]:
                return [self.SERVICE_API]
            sqllag = self._sql_info_cache["replag"]

        if sqllag > 300:
            if not self._maxlag:
                return [self.SERVICE_API, self.SERVICE_SQL]
            if now - self._api_info_cache["lastcheck"] > 300:
                self._api_info_cache["lastcheck"] = now
                try:
                    self._api_info_cache["maxlag"] = apilag = self.get_maxlag()
                except exceptions.APIError:
                    self._api_info_cache["maxlag"] = apilag = 0
            else:
                apilag = self._api_info_cache["maxlag"]
            if apilag > self._maxlag:
                return [self.SERVICE_SQL, self.SERVICE_API]
            return [self.SERVICE_API, self.SERVICE_SQL]

        return [self.SERVICE_SQL, self.SERVICE_API]

    @property
    def name(self):
        """The Site's name (or "wikiid" in the API), like ``"enwiki"``."""
        return self._name

    @property
    def project(self):
        """The Site's project name in lowercase, like ``"wikipedia"``."""
        return self._project

    @property
    def lang(self):
        """The Site's language code, like ``"en"`` or ``"es"``."""
        return self._lang

    @property
    def domain(self):
        """The Site's web domain, like ``"en.wikipedia.org"``."""
        return urlparse(self._base_url).netloc

    @property
    def url(self):
        """The Site's full base URL, like ``"https://en.wikipedia.org"``."""
        url = self._base_url
        if url.startswith("//"):  # Protocol-relative URLs from 1.18
            if self._use_https:
                url = "https:" + url
            else:
                url = "http:" + url
        return url

    def api_query(self, **kwargs):
        """Do an API query with `kwargs` as the parameters.

        This will first attempt to construct an API url from
        :py:attr:`self._base_url` and :py:attr:`self._script_path`. We need
        both of these, or else we'll raise
        :py:exc:`~earwigbot.exceptions.APIError`. If
        :py:attr:`self._base_url` is protocol-relative (introduced in MediaWiki
        1.18), we'll choose HTTPS only if :py:attr:`self._user_https` is
        ``True``, otherwise HTTP.

        We'll encode the given params, adding ``format=json`` along the way, as
        well as ``&assert=`` and ``&maxlag=`` based on
        :py:attr:`self._assert_edit` and :py:attr:`_maxlag` respectively.
        Additionally, we'll sleep a bit if the last query was made fewer than
        :py:attr:`self._wait_between_queries` seconds ago. The request is made
        through :py:attr:`self._opener`, which has cookie support
        (:py:attr:`self._cookiejar`), a ``User-Agent``
        (:py:const:`earwigbot.wiki.constants.USER_AGENT`), and
        ``Accept-Encoding`` set to ``"gzip"``.

        Assuming everything went well, we'll gunzip the data (if compressed),
        load it as a JSON object, and return it.

        If our request failed for some reason, we'll raise
        :py:exc:`~earwigbot.exceptions.APIError` with details. If that
        reason was due to maxlag, we'll sleep for a bit and then repeat the
        query until we exceed :py:attr:`self._max_retries`.

        There is helpful MediaWiki API documentation at `MediaWiki.org
        <http://www.mediawiki.org/wiki/API>`_.
        """
        with self._api_lock:
            return self._api_query(kwargs)

    def sql_query(self, query, params=(), plain_query=False, dict_cursor=False,
                  cursor_class=None, show_table=False):
        """Do an SQL query and yield its results.

        If *plain_query* is ``True``, we will force an unparameterized query.
        Specifying both *params* and *plain_query* will cause an error. If
        *dict_cursor* is ``True``, we will use :py:class:`oursql.DictCursor` as
        our cursor, otherwise the default :py:class:`oursql.Cursor`. If
        *cursor_class* is given, it will override this option. If *show_table*
        is True, the name of the table will be prepended to the name of the
        column. This will mainly affect an :py:class:`~oursql.DictCursor`.

        Example usage::

            >>> query = "SELECT user_id, user_registration FROM user WHERE user_name = ?"
            >>> params = ("The Earwig",)
            >>> result1 = site.sql_query(query, params)
            >>> result2 = site.sql_query(query, params, dict_cursor=True)
            >>> for row in result1: print row
            (7418060L, '20080703215134')
            >>> for row in result2: print row
            {'user_id': 7418060L, 'user_registration': '20080703215134'}

        This may raise :py:exc:`~earwigbot.exceptions.SQLError` or one of
        oursql's exceptions (:py:exc:`oursql.ProgrammingError`,
        :py:exc:`oursql.InterfaceError`, ...) if there were problems with the
        query.

        See :py:meth:`_sql_connect` for information on how a connection is
        acquired. Also relevant is `oursql's documentation
        <http://packages.python.org/oursql>`_ for details on that package.
        """
        if not cursor_class:
            if dict_cursor:
                cursor_class = oursql.DictCursor
            else:
                cursor_class = oursql.Cursor
        klass = cursor_class

        with self._sql_lock:
            if not self._sql_conn:
                self._sql_connect()
            with self._sql_conn.cursor(klass, show_table=show_table) as cur:
                cur.execute(query, params, plain_query)
                for result in cur:
                    yield result

    def get_maxlag(self, showall=False):
        """Return the internal database replication lag in seconds.

        In a typical setup, this function returns the replication lag *within*
        the WMF's cluster, *not* external replication lag affecting the
        Toolserver (see :py:meth:`get_replag` for that). This is useful when
        combined with the ``maxlag`` API query param (added by config), in
        which queries will be halted and retried if the lag is too high,
        usually above five seconds.

        With *showall*, will return a list of the lag for all servers in the
        cluster, not just the one with the highest lag.
        """
        params = {"action": "query", "meta": "siteinfo", "siprop": "dbrepllag"}
        if showall:
            params["sishowalldb"] = 1
        with self._api_lock:
            result = self._api_query(params, ignore_maxlag=True)
        if showall:
            return [server["lag"] for server in result["query"]["dbrepllag"]]
        return result["query"]["dbrepllag"][0]["lag"]

    def get_replag(self):
        """Return the estimated external database replication lag in seconds.

        Requires SQL access. This function only makes sense on a replicated
        database (e.g. the Wikimedia Toolserver) and on a wiki that receives a
        large number of edits (ideally, at least one per second), or the result
        may be larger than expected, since it works by subtracting the current
        time from the timestamp of the latest recent changes event.

        This may raise :py:exc:`~earwigbot.exceptions.SQLError` or one of
        oursql's exceptions (:py:exc:`oursql.ProgrammingError`,
        :py:exc:`oursql.InterfaceError`, ...) if there were problems.
        """
        query = """SELECT UNIX_TIMESTAMP() - UNIX_TIMESTAMP(rc_timestamp) FROM
                   recentchanges ORDER BY rc_timestamp DESC LIMIT 1"""
        result = list(self.sql_query(query))
        return result[0][0]

    def namespace_id_to_name(self, ns_id, all=False):
        """Given a namespace ID, returns associated namespace names.

        If *all* is ``False`` (default), we'll return the first name in the
        list, which is usually the localized version. Otherwise, we'll return
        the entire list, which includes the canonical name. For example, this
        returns ``u"Wikipedia"`` if *ns_id* = ``4`` and *all* is ``False`` on
        ``enwiki``; returns ``[u"Wikipedia", u"Project", u"WP"]`` if *ns_id* =
        ``4`` and *all* is ``True``.

        Raises :py:exc:`~earwigbot.exceptions.NamespaceNotFoundError` if the ID
        is not found.
        """
        try:
            if all:
                return self._namespaces[ns_id]
            else:
                return self._namespaces[ns_id][0]
        except KeyError:
            e = "There is no namespace with id {0}.".format(ns_id)
            raise exceptions.NamespaceNotFoundError(e)

    def namespace_name_to_id(self, name):
        """Given a namespace name, returns the associated ID.

        Like :py:meth:`namespace_id_to_name`, but reversed. Case is ignored,
        because namespaces are assumed to be case-insensitive.

        Raises :py:exc:`~earwigbot.exceptions.NamespaceNotFoundError` if the
        name is not found.
        """
        lname = name.lower()
        for ns_id, names in self._namespaces.items():
            lnames = [n.lower() for n in names]  # Be case-insensitive
            if lname in lnames:
                return ns_id

        e = "There is no namespace with name '{0}'.".format(name)
        raise exceptions.NamespaceNotFoundError(e)

    def get_page(self, title, follow_redirects=False, pageid=None):
        """Return a :py:class:`Page` object for the given title.

        *follow_redirects* is passed directly to
        :py:class:`~earwigbot.wiki.page.Page`'s constructor. Also, this will
        return a :py:class:`~earwigbot.wiki.category.Category` object instead
        if the given title is in the category namespace. As
        :py:class:`~earwigbot.wiki.category.Category` is a subclass of
        :py:class:`~earwigbot.wiki.page.Page`, this should not cause problems.

        Note that this doesn't do any direct checks for existence or
        redirect-following: :py:class:`~earwigbot.wiki.page.Page`'s methods
        provide that.
        """
        title = self._unicodeify(title)
        prefixes = self.namespace_id_to_name(constants.NS_CATEGORY, all=True)
        prefix = title.split(":", 1)[0]
        if prefix != title:  # Avoid a page that is simply "Category"
            if prefix in prefixes:
                return Category(self, title, follow_redirects, pageid,
                                self._logger)
        return Page(self, title, follow_redirects, pageid, self._logger)

    def get_category(self, catname, follow_redirects=False, pageid=None):
        """Return a :py:class:`Category` object for the given category name.

        *catname* should be given *without* a namespace prefix. This method is
        really just shorthand for :py:meth:`get_page("Category:" + catname)
        <get_page>`.
        """
        catname = self._unicodeify(catname)
        prefix = self.namespace_id_to_name(constants.NS_CATEGORY)
        pagename = u':'.join((prefix, catname))
        return Category(self, pagename, follow_redirects, pageid, self._logger)

    def get_user(self, username=None):
        """Return a :py:class:`User` object for the given username.

        If *username* is left as ``None``, then a
        :py:class:`~earwigbot.wiki.user.User` object representing the currently
        logged-in (or anonymous!) user is returned.
        """
        if username:
            username = self._unicodeify(username)
        else:
            username = self._get_username()
        return User(self, username, self._logger)

    def delegate(self, services, args=None, kwargs=None):
        """Delegate a task to either the API or SQL depending on conditions.

        *services* should be a dictionary in which the key is the service name
        (:py:attr:`self.SERVICE_API <SERVICE_API>` or
        :py:attr:`self.SERVICE_SQL <SERVICE_SQL>`), and the value is the
        function to call for this service. All functions will be passed the
        same arguments the tuple *args* and the dict **kwargs**, which are both
        empty by default. The service order is determined by
        :py:meth:`_get_service_order`.

        Not every service needs an entry in the dictionary. Will raise
        :py:exc:`~earwigbot.exceptions.NoServiceError` if an appropriate
        service cannot be found.
        """
        if not args:
            args = ()
        if not kwargs:
            kwargs = {}

        order = self._get_service_order()
        for srv in order:
            if srv in services:
                try:
                    return services[srv](*args, **kwargs)
                except exceptions.ServiceError:
                    continue
        raise exceptions.NoServiceError(services)
Esempio n. 22
0
class HLSFetcher(object):

	def __init__(self, url, **options):

		self.program = options.get('program',1)
		self.hls_headers = options.get('headers',{})
		self.path = options.get('path',None)
		self.bitrate = options.get('bitrate',200000)
		self.nbuffer = options.get('buffer',3)
		self.n_segments_keep = options.get('keep',self.nbuffer+1)
		url = urllib.unquote(url)
		self.puser = options.get('puser')
		self.ppass = options.get('ppass')
		self.purl = options.get('purl')

		us = url.split('|')
		if len(us) > 1:
			self.url = us[0]
			for hd in us[1:]:
				self.hls_headers.update(dict(urlparse.parse_qsl(hd.strip())))
		else:
			self.url = url

		self.agent = self.hls_headers.pop('User-Agent', 'Enigma2 Mediaplayer')
		if not self.path:
			self.path = tempfile.mkdtemp()

		self._program_playlist = None
		self._file_playlist = None
		self._cookies = CookieJar()
		self._cached_files = {} 	# sequence n -> path
		self._run = True
		self._poolHelper = TwHTTP11PoolHelper(retryAutomatically=True)

		self._files = None 			# the iter of the playlist files download
		self._next_download = None 	# the delayed download defer, if any
		self._file_playlisted = None # the defer to wait until new files are added to playlist
		self._new_filed = None
		self._seg_task = None

	def _get_page(self, url):
		url = url.encode("utf-8")
		if 'HLS_RESET_COOKIES' in os.environ.keys():
			self._cookies.clear()

		timeout = 10
		return twAgentGetPage(url, agent=self.agent, cookieJar=self._cookies, headers=self.hls_headers, timeout=timeout, pool=self._poolHelper._pool, proxy_url=self.purl, p_user=self.puser, p_pass=self.ppass)

	def _download_page(self, url, path, file):
		def _decrypt(data):
			def num_to_iv(n):
				iv = struct.pack(">8xq", n)
				return b"\x00" * (16 - len(iv)) + iv

			if not self._file_playlist._iv:
				iv = num_to_iv(file['sequence'])
				aes = AES.new(self._file_playlist._key, AES.MODE_CBC, iv)
			else:
				aes = AES.new(self._file_playlist._key, AES.MODE_CBC, self._file_playlist._iv)
			return aes.decrypt(data)

		d = self._get_page(url)
		if self._file_playlist._key:
			d.addCallback(_decrypt)
		return d

	def _download_segment(self, f):
		url = make_url(self._file_playlist.url, f['file'])
		name = 'seg_' + next(tempfile._get_candidate_names())
		path = os.path.join(self.path, name)
		d = self._download_page(url, path, f)
		if self.n_segments_keep != 0:
			file = open(path, 'wb')
			d.addCallback(lambda x: file.write(x))
			d.addBoth(lambda _: file.close())
			d.addCallback(lambda _: path)
			d.addErrback(self._got_file_failed)
			d.addCallback(self._got_file, url, f)
		else:
			d.addCallback(lambda _: (None, path, f))
		return d

	def delete_cache(self, f):
		bgFileEraser = eBackgroundFileEraser.getInstance()
		keys = self._cached_files.keys()
		for i in ifilter(f, keys):
			filename = self._cached_files[i]
			bgFileEraser.erase(str(filename))
			del self._cached_files[i]

	def delete_all_cache(self):
		bgFileEraser = eBackgroundFileEraser.getInstance()
		for path in self._cached_files.itervalues():
			bgFileEraser.erase(str(path))
		self._cached_files.clear()

	def _got_file_failed(self, e):
		if self._new_filed:
			self._new_filed.errback(e)
			self._new_filed = None

	def _got_file(self, path, url, f):
		self._cached_files[f['sequence']] = path
		if self.n_segments_keep != -1:
			self.delete_cache(lambda x: x <= f['sequence'] - self.n_segments_keep)
		if self._new_filed:
			self._new_filed.callback((path, url, f))
			self._new_filed = None
		return (path, url, f)

	def _get_next_file(self):
		next = self._files.next()
		if next:
			return self._download_segment(next)
		elif not self._file_playlist.endlist():
			self._seg_task.stop()
			self._file_playlisted = defer.Deferred()
			self._file_playlisted.addCallback(lambda x: self._get_next_file())
			self._file_playlisted.addCallback(self._next_file_delay)
			self._file_playlisted.addCallback(self._seg_task.start)
			return self._file_playlisted

	def _handle_end(self, failure):
		failure.trap(StopIteration)
		print "End of media"

	def _next_file_delay(self, f):
		if f == None: return 0
		delay = f[2]["duration"]
		if self.nbuffer > 0:
			for i in range(0,self.nbuffer):
				if self._cached_files.has_key(f[2]['sequence'] - i):
					return delay
			delay = 0
		elif self._file_playlist.endlist():
			delay = 1
		return delay

	def _get_files_loop(self, res=None):
		if not self._seg_task:
			self._seg_task = task.LoopingCall(self._get_next_file)
		d = self._get_next_file()
		if d != None:
			self._seg_task.stop()
			d.addCallback(self._next_file_delay)
			d.addCallback(self._seg_task.start)
			d.addErrback(self._handle_end)

	def _playlist_updated(self, pl):
		if pl and pl.has_programs():
			# if we got a program playlist, save it and start a program
			self._program_playlist = pl
			(program_url, _) = pl.get_program_playlist(self.program, self.bitrate)
			return self._reload_playlist(M3U8(program_url, self._cookies, self.hls_headers))
		elif pl and pl.has_files():
			# we got sequence playlist, start reloading it regularly, and get files
			self._file_playlist = pl
			if not self._files:
				self._files = pl.iter_files()
			if not pl.endlist():
				reactor.callLater(pl.reload_delay(), self._reload_playlist, pl)
			if self._file_playlisted:
				self._file_playlisted.callback(pl)
				self._file_playlisted = None
		else:
			raise Exception('Playlist has no valid content.')
		return pl

	def _got_playlist_content(self, content, pl):
		if not pl.update(content) and self._run:
			# if the playlist cannot be loaded, start a reload timer
			d = deferLater(reactor, pl.reload_delay(), self._fetch_playlist, pl)
			d.addCallback(self._got_playlist_content, pl)
			return d
		return pl

	def _fetch_playlist(self, pl):
		d = self._get_page(pl.url)
		return d

	def _reload_playlist(self, pl):
		if self._run:
			d = self._fetch_playlist(pl)
			d.addCallback(self._got_playlist_content, pl)
			d.addCallback(self._playlist_updated)
			return d
		else:
			return None

	def get_file(self, sequence):
		d = defer.Deferred()
		keys = self._cached_files.keys()
		try:
			endlist = sequence == self._file_playlist._end_sequence
			sequence = ifilter(lambda x: x >= sequence, keys).next()
			filename = self._cached_files[sequence]
			d.callback((filename, endlist))
		except:
			d.addCallback(lambda x: self.get_file(sequence))
			self._new_filed = d
			keys.sort()
		return d

	def _start_get_files(self, x):
		self._new_filed = defer.Deferred()
		self._get_files_loop()
		return self._new_filed

	def start(self):
		if self._run:
			self._files = None
			d = self._reload_playlist(M3U8(self.url, self._cookies, self.hls_headers))
			d.addCallback(self._start_get_files)
			return d

	def stop(self):
		self._run = False
		self._poolHelper.close()
		if self._seg_task != None:
			self._seg_task.stop()
		if self._new_filed != None:
			self._new_filed.cancel()
		reactor.callLater(1, self.delete_all_cache)
Esempio n. 23
0
class Bugz:
    """ Converts sane method calls to Bugzilla HTTP requests.

	@ivar base: base url of bugzilla.
	@ivar user: username for authenticated operations.
	@ivar password: password for authenticated operations
	@ivar cookiejar: for authenticated sessions so we only auth once.
	@ivar forget: forget user/password after session.
	@ivar authenticated: is this session authenticated already
	"""

    def __init__(self, base, user=None, password=None, forget=False, skip_auth=False, httpuser=None, httppassword=None):
        """
		{user} and {password} will be prompted if an action needs them
		and they are not supplied.

		if {forget} is set, the login cookie will be destroyed on quit.

		@param base: base url of the bugzilla
		@type  base: string
		@keyword user: username for authenticated actions.
		@type    user: string
		@keyword password: password for authenticated actions.
		@type    password: string
		@keyword forget: forget login session after termination.
		@type    forget: bool
		@keyword skip_auth: do not authenticate
		@type    skip_auth: bool
		"""
        self.base = base
        scheme, self.host, self.path, query, frag = urlsplit(self.base)
        self.authenticated = False
        self.forget = forget

        if not self.forget:
            try:
                cookie_file = os.path.join(os.environ["HOME"], COOKIE_FILE)
                self.cookiejar = LWPCookieJar(cookie_file)
                if forget:
                    try:
                        self.cookiejar.load()
                        self.cookiejar.clear()
                        self.cookiejar.save()
                        os.chmod(self.cookiejar.filename, 0600)
                    except IOError:
                        pass
            except KeyError:
                self.warn("Unable to save session cookies in %s" % cookie_file)
                self.cookiejar = CookieJar(cookie_file)
        else:
            self.cookiejar = CookieJar()

        self.opener = build_opener(HTTPCookieProcessor(self.cookiejar))
        self.user = user
        self.password = password
        self.httpuser = httpuser
        self.httppassword = httppassword
        self.skip_auth = skip_auth

    def log(self, status_msg):
        """Default logging handler. Expected to be overridden by
		the UI implementing subclass.

		@param status_msg: status message to print
		@type  status_msg: string
		"""
        return

    def warn(self, warn_msg):
        """Default logging handler. Expected to be overridden by
		the UI implementing subclass.

		@param status_msg: status message to print
		@type  status_msg: string
		"""
        return

    def get_input(self, prompt):
        """Default input handler. Expected to be override by the
		UI implementing subclass.

		@param prompt: Prompt message
		@type  prompt: string
		"""
        return ""

    def auth(self):
        """Authenticate a session.
		"""
        # check if we need to authenticate
        if self.authenticated:
            return

            # try seeing if we really need to request login
        if not self.forget:
            try:
                self.cookiejar.load()
            except IOError:
                pass

        req_url = urljoin(self.base, config.urls["auth"])
        req_url += "?GoAheadAndLogIn=1"
        req = Request(req_url, None, config.headers)
        if self.httpuser and self.httppassword:
            base64string = base64.encodestring("%s:%s" % (self.httpuser, self.httppassword))[:-1]
            req.add_header("Authorization", "Basic %s" % base64string)
        resp = self.opener.open(req)
        re_request_login = re.compile(r"<title>.*Log in to .*</title>")
        if not re_request_login.search(resp.read()):
            self.log("Already logged in.")
            self.authenticated = True
            return

            # prompt for username if we were not supplied with it
        if not self.user:
            self.log("No username given.")
            self.user = self.get_input("Username: "******"No password given.")
            self.password = getpass.getpass()

            # perform login
        qparams = config.params["auth"].copy()
        qparams["Bugzilla_login"] = self.user
        qparams["Bugzilla_password"] = self.password
        if not self.forget:
            qparams["Bugzilla_remember"] = "on"

        req_url = urljoin(self.base, config.urls["auth"])
        req = Request(req_url, urlencode(qparams), config.headers)
        if self.httpuser and self.httppassword:
            base64string = base64.encodestring("%s:%s" % (self.httpuser, self.httppassword))[:-1]
            req.add_header("Authorization", "Basic %s" % base64string)
        resp = self.opener.open(req)
        if resp.info().has_key("Set-Cookie"):
            self.authenticated = True
            if not self.forget:
                self.cookiejar.save()
                os.chmod(self.cookiejar.filename, 0600)
            return True
        else:
            raise RuntimeError("Failed to login")

    def extractResults(self, resp):
        # parse the results into dicts.
        results = []
        columns = []
        rows = []

        for r in csv.reader(resp):
            rows.append(r)
        for field in rows[0]:
            if config.choices["column_alias"].has_key(field):
                columns.append(config.choices["column_alias"][field])
            else:
                self.log("Unknown field: " + field)
                columns.append(field)
        for row in rows[1:]:
            if "Missing Search" in row[0]:
                self.log("Bugzilla error (Missing search found)")
                return None
            fields = {}
            for i in range(min(len(row), len(columns))):
                fields[columns[i]] = row[i]
            results.append(fields)
        return results

    def search(
        self,
        query,
        comments=False,
        order="number",
        assigned_to=None,
        reporter=None,
        cc=None,
        commenter=None,
        whiteboard=None,
        keywords=None,
        status=[],
        severity=[],
        priority=[],
        product=[],
        component=[],
    ):
        """Search bugzilla for a bug.

		@param query: query string to search in title or {comments}.
		@type  query: string
		@param order: what order to returns bugs in.
		@type  order: string

		@keyword assigned_to: email address which the bug is assigned to.
		@type    assigned_to: string
		@keyword reporter: email address matching the bug reporter.
		@type    reporter: string
		@keyword cc: email that is contained in the CC list
		@type    cc: string
		@keyword commenter: email of a commenter.
		@type    commenter: string

		@keyword whiteboard: string to search in status whiteboard (gentoo?)
		@type    whiteboard: string
		@keyword keywords: keyword to search for
		@type    keywords: string

		@keyword status: bug status to match. default is ['NEW', 'ASSIGNED',
						 'REOPENED'].
		@type    status: list
		@keyword severity: severity to match, empty means all.
		@type    severity: list
		@keyword priority: priority levels to patch, empty means all.
		@type    priority: list
		@keyword comments: search comments instead of just bug title.
		@type    comments: bool
		@keyword product: search within products. empty means all.
		@type    product: list
		@keyword component: search within components. empty means all.
		@type    component: list

		@return: list of bugs, each bug represented as a dict
		@rtype: list of dicts
		"""

        if not self.authenticated and not self.skip_auth:
            self.auth()

        qparams = config.params["list"].copy()
        qparams["value0-0-0"] = query
        if comments:
            qparams["type0-0-1"] = qparams["type0-0-0"]
            qparams["value0-0-1"] = query

        qparams["order"] = config.choices["order"].get(order, "Bug Number")
        qparams["bug_severity"] = severity or []
        qparams["priority"] = priority or []
        if status is None:
            # NEW, ASSIGNED and REOPENED is obsolete as of bugzilla 3.x and has
            # been removed from bugs.gentoo.org on 2011/05/01
            qparams["bug_status"] = ["NEW", "ASSIGNED", "REOPENED", "UNCONFIRMED", "CONFIRMED", "IN_PROGRESS"]
        elif [s.upper() for s in status] == ["ALL"]:
            qparams["bug_status"] = config.choices["status"]
        else:
            qparams["bug_status"] = [s.upper() for s in status]
        qparams["product"] = product or ""
        qparams["component"] = component or ""
        qparams["status_whiteboard"] = whiteboard or ""
        qparams["keywords"] = keywords or ""

        # hoops to jump through for emails, since there are
        # only two fields, we have to figure out what combinations
        # to use if all three are set.
        unique = list(set([assigned_to, cc, reporter, commenter]))
        unique = [u for u in unique if u]
        if len(unique) < 3:
            for i in range(len(unique)):
                e = unique[i]
                n = i + 1
                qparams["email%d" % n] = e
                qparams["emailassigned_to%d" % n] = int(e == assigned_to)
                qparams["emailreporter%d" % n] = int(e == reporter)
                qparams["emailcc%d" % n] = int(e == cc)
                qparams["emaillongdesc%d" % n] = int(e == commenter)
        else:
            raise AssertionError("Cannot set assigned_to, cc, and " "reporter in the same query")

        req_params = urlencode(qparams, True)
        req_url = urljoin(self.base, config.urls["list"])
        req_url += "?" + req_params
        req = Request(req_url, None, config.headers)
        if self.httpuser and self.httppassword:
            base64string = base64.encodestring("%s:%s" % (self.httpuser, self.httppassword))[:-1]
            req.add_header("Authorization", "Basic %s" % base64string)
        resp = self.opener.open(req)
        return self.extractResults(resp)

    def namedcmd(self, cmd):
        """Run command stored in Bugzilla by name.

		@return: Result from the stored command.
		@rtype: list of dicts
		"""

        if not self.authenticated and not self.skip_auth:
            self.auth()

        qparams = config.params["namedcmd"].copy()
        # Is there a better way of getting a command with a space in its name
        # to be encoded as foo%20bar instead of foo+bar or foo%2520bar?
        qparams["namedcmd"] = quote(cmd)
        req_params = urlencode(qparams, True)
        req_params = req_params.replace("%25", "%")

        req_url = urljoin(self.base, config.urls["list"])
        req_url += "?" + req_params
        req = Request(req_url, None, config.headers)
        if self.user and self.password:
            base64string = base64.encodestring("%s:%s" % (self.user, self.password))[:-1]
            req.add_header("Authorization", "Basic %s" % base64string)
        resp = self.opener.open(req)

        return self.extractResults(resp)

    def get(self, bugid):
        """Get an ElementTree representation of a bug.

		@param bugid: bug id
		@type  bugid: int

		@rtype: ElementTree
		"""
        if not self.authenticated and not self.skip_auth:
            self.auth()

        qparams = config.params["show"].copy()
        qparams["id"] = bugid

        req_params = urlencode(qparams, True)
        req_url = urljoin(self.base, config.urls["show"])
        req_url += "?" + req_params
        req = Request(req_url, None, config.headers)
        if self.httpuser and self.httppassword:
            base64string = base64.encodestring("%s:%s" % (self.httpuser, self.httppassword))[:-1]
            req.add_header("Authorization", "Basic %s" % base64string)
        resp = self.opener.open(req)

        data = resp.read()
        # Get rid of control characters.
        data = re.sub("[\x00-\x08\x0e-\x1f\x0b\x0c]", "", data)
        fd = StringIO(data)

        # workaround for ill-defined XML templates in bugzilla 2.20.2
        (major_version, minor_version) = (sys.version_info[0], sys.version_info[1])
        if major_version > 2 or (major_version == 2 and minor_version >= 7):
            # If this is 2.7 or greater, then XMLTreeBuilder
            # does what we want.
            parser = ElementTree.XMLParser()
        else:
            # Running under Python 2.6, so we need to use our
            # subclass of XMLTreeBuilder instead.
            parser = ForcedEncodingXMLTreeBuilder(encoding="utf-8")

        etree = ElementTree.parse(fd, parser)
        bug = etree.find(".//bug")
        if bug is not None and bug.attrib.has_key("error"):
            return None
        else:
            return etree

    def modify(
        self,
        bugid,
        title=None,
        comment=None,
        url=None,
        status=None,
        resolution=None,
        assigned_to=None,
        duplicate=0,
        priority=None,
        severity=None,
        add_cc=[],
        remove_cc=[],
        add_dependson=[],
        remove_dependson=[],
        add_blocked=[],
        remove_blocked=[],
        whiteboard=None,
        keywords=None,
        component=None,
    ):
        """Modify an existing bug

		@param bugid: bug id
		@type  bugid: int
		@keyword title: new title for bug
		@type    title: string
		@keyword comment: comment to add
		@type    comment: string
		@keyword url: new url
		@type    url: string
		@keyword status: new status (note, if you are changing it to RESOLVED, you need to set {resolution} as well.
		@type    status: string
		@keyword resolution: new resolution (if status=RESOLVED)
		@type    resolution: string
		@keyword assigned_to: email (needs to exist in bugzilla)
		@type    assigned_to: string
		@keyword duplicate: bug id to duplicate against (if resolution = DUPLICATE)
		@type    duplicate: int
		@keyword priority: new priority for bug
		@type    priority: string
		@keyword severity: new severity for bug
		@type    severity: string
		@keyword add_cc: list of emails to add to the cc list
		@type    add_cc: list of strings
		@keyword remove_cc: list of emails to remove from cc list
		@type    remove_cc: list of string.
		@keyword add_dependson: list of bug ids to add to the depend list
		@type    add_dependson: list of strings
		@keyword remove_dependson: list of bug ids to remove from depend list
		@type    remove_dependson: list of strings
		@keyword add_blocked: list of bug ids to add to the blocked list
		@type    add_blocked: list of strings
		@keyword remove_blocked: list of bug ids to remove from blocked list
		@type    remove_blocked: list of strings

		@keyword whiteboard: set status whiteboard
		@type    whiteboard: string
		@keyword keywords: set keywords
		@type    keywords: string
		@keyword component: set component
		@type    component: string

		@return: list of fields modified.
		@rtype: list of strings
		"""
        if not self.authenticated and not self.skip_auth:
            self.auth()

        buginfo = Bugz.get(self, bugid)
        if not buginfo:
            return False

        modified = []
        qparams = config.params["modify"].copy()
        qparams["id"] = bugid
        # NOTE: knob has been removed in bugzilla 4 and 3?
        qparams["knob"] = "none"

        # copy existing fields
        FIELDS = (
            "bug_file_loc",
            "bug_severity",
            "short_desc",
            "bug_status",
            "status_whiteboard",
            "keywords",
            "resolution",
            "op_sys",
            "priority",
            "version",
            "target_milestone",
            "assigned_to",
            "rep_platform",
            "product",
            "component",
            "token",
        )

        FIELDS_MULTI = ("blocked", "dependson")

        for field in FIELDS:
            try:
                qparams[field] = buginfo.find(".//%s" % field).text
                if qparams[field] is None:
                    del qparams[field]
            except:
                pass

        for field in FIELDS_MULTI:
            qparams[field] = [d.text for d in buginfo.findall(".//%s" % field) if d is not None and d.text is not None]

            # set 'knob' if we are change the status/resolution
            # or trying to reassign bug.
        if status:
            status = status.upper()
        if resolution:
            resolution = resolution.upper()

        if status and status != qparams["bug_status"]:
            # Bugzilla >= 3.x
            qparams["bug_status"] = status

            if status == "RESOLVED":
                qparams["knob"] = "resolve"
                if resolution:
                    qparams["resolution"] = resolution
                else:
                    qparams["resolution"] = "FIXED"

                modified.append(("status", status))
                modified.append(("resolution", qparams["resolution"]))
            elif status == "ASSIGNED" or status == "IN_PROGRESS":
                qparams["knob"] = "accept"
                modified.append(("status", status))
            elif status == "REOPENED":
                qparams["knob"] = "reopen"
                modified.append(("status", status))
            elif status == "VERIFIED":
                qparams["knob"] = "verified"
                modified.append(("status", status))
            elif status == "CLOSED":
                qparams["knob"] = "closed"
                modified.append(("status", status))
        elif duplicate:
            # Bugzilla >= 3.x
            qparams["bug_status"] = "RESOLVED"
            qparams["resolution"] = "DUPLICATE"

            qparams["knob"] = "duplicate"
            qparams["dup_id"] = duplicate
            modified.append(("status", "RESOLVED"))
            modified.append(("resolution", "DUPLICATE"))
        elif assigned_to:
            qparams["knob"] = "reassign"
            qparams["assigned_to"] = assigned_to
            modified.append(("assigned_to", assigned_to))

            # setup modification of other bits
        if comment:
            qparams["comment"] = comment
            modified.append(("comment", ellipsis(comment, 60)))
        if title:
            qparams["short_desc"] = title or ""
            modified.append(("title", title))
        if url is not None:
            qparams["bug_file_loc"] = url
            modified.append(("url", url))
        if severity is not None:
            qparams["bug_severity"] = severity
            modified.append(("severity", severity))
        if priority is not None:
            qparams["priority"] = priority
            modified.append(("priority", priority))

            # cc manipulation
        if add_cc is not None:
            qparams["newcc"] = ", ".join(add_cc)
            modified.append(("newcc", qparams["newcc"]))
        if remove_cc is not None:
            qparams["cc"] = remove_cc
            qparams["removecc"] = "on"
            modified.append(("cc", remove_cc))

            # bug depend/blocked manipulation
        changed_dependson = False
        changed_blocked = False
        if remove_dependson:
            for bug_id in remove_dependson:
                qparams["dependson"].remove(str(bug_id))
                changed_dependson = True
        if remove_blocked:
            for bug_id in remove_blocked:
                qparams["blocked"].remove(str(bug_id))
                changed_blocked = True
        if add_dependson:
            for bug_id in add_dependson:
                qparams["dependson"].append(str(bug_id))
                changed_dependson = True
        if add_blocked:
            for bug_id in add_blocked:
                qparams["blocked"].append(str(bug_id))
                changed_blocked = True

        qparams["dependson"] = ",".join(qparams["dependson"])
        qparams["blocked"] = ",".join(qparams["blocked"])
        if changed_dependson:
            modified.append(("dependson", qparams["dependson"]))
        if changed_blocked:
            modified.append(("blocked", qparams["blocked"]))

        if whiteboard is not None:
            qparams["status_whiteboard"] = whiteboard
            modified.append(("status_whiteboard", whiteboard))
        if keywords is not None:
            qparams["keywords"] = keywords
            modified.append(("keywords", keywords))
        if component is not None:
            qparams["component"] = component
            modified.append(("component", component))

        req_params = urlencode(qparams, True)
        req_url = urljoin(self.base, config.urls["modify"])
        req = Request(req_url, req_params, config.headers)
        if self.httpuser and self.httppassword:
            base64string = base64.encodestring("%s:%s" % (self.httpuser, self.httppassword))[:-1]
            req.add_header("Authorization", "Basic %s" % base64string)

        try:
            resp = self.opener.open(req)
            re_error = re.compile(r'id="error_msg".*>([^<]+)<')
            error = re_error.search(resp.read())
            if error:
                print error.group(1)
                return []
            return modified
        except:
            return []

    def attachment(self, attachid):
        """Get an attachment by attachment_id

		@param attachid: attachment id
		@type  attachid: int

		@return: dict with three keys, 'filename', 'size', 'fd'
		@rtype: dict
		"""
        if not self.authenticated and not self.skip_auth:
            self.auth()

        qparams = config.params["attach"].copy()
        qparams["id"] = attachid

        req_params = urlencode(qparams, True)
        req_url = urljoin(self.base, config.urls["attach"])
        req_url += "?" + req_params
        req = Request(req_url, None, config.headers)
        if self.httpuser and self.httppassword:
            base64string = base64.encodestring("%s:%s" % (self.httpuser, self.httppassword))[:-1]
            req.add_header("Authorization", "Basic %s" % base64string)
        resp = self.opener.open(req)

        try:
            content_type = resp.info()["Content-type"]
            namefield = content_type.split(";")[1]
            filename = re.search(r"name=\"(.*)\"", namefield).group(1)
            content_length = int(resp.info()["Content-length"], 0)
            return {"filename": filename, "size": content_length, "fd": resp}
        except:
            return {}

    def post(
        self,
        product,
        component,
        title,
        description,
        url="",
        assigned_to="",
        cc="",
        keywords="",
        version="",
        dependson="",
        blocked="",
        priority="",
        severity="",
    ):
        """Post a bug

		@param product: product where the bug should be placed
		@type product: string
		@param component: component where the bug should be placed
		@type component: string
		@param title: title of the bug.
		@type  title: string
		@param description: description of the bug
		@type  description: string
		@keyword url: optional url to submit with bug
		@type url: string
		@keyword assigned_to: optional email to assign bug to
		@type assigned_to: string.
		@keyword cc: option list of CC'd emails
		@type: string
		@keyword keywords: option list of bugzilla keywords
		@type: string
		@keyword version: version of the component
		@type: string
		@keyword dependson: bugs this one depends on
		@type: string
		@keyword blocked: bugs this one blocks
		@type: string
		@keyword priority: priority of this bug
		@type: string
		@keyword severity: severity of this bug
		@type: string

		@rtype: int
		@return: the bug number, or 0 if submission failed.
		"""
        if not self.authenticated and not self.skip_auth:
            self.auth()

        qparams = config.params["post"].copy()
        qparams["product"] = product
        qparams["component"] = component
        qparams["short_desc"] = title
        qparams["comment"] = description
        qparams["assigned_to"] = assigned_to
        qparams["cc"] = cc
        qparams["bug_file_loc"] = url
        qparams["dependson"] = dependson
        qparams["blocked"] = blocked
        qparams["keywords"] = keywords

        # XXX: default version is 'unspecified'
        if version != "":
            qparams["version"] = version

            # XXX: default priority is 'Normal'
        if priority != "":
            qparams["priority"] = priority

            # XXX: default severity is 'normal'
        if severity != "":
            qparams["bug_severity"] = severity

        req_params = urlencode(qparams, True)
        req_url = urljoin(self.base, config.urls["post"])
        req = Request(req_url, req_params, config.headers)
        if self.httpuser and self.httppassword:
            base64string = base64.encodestring("%s:%s" % (self.httpuser, self.httppassword))[:-1]
            req.add_header("Authorization", "Basic %s" % base64string)
        resp = self.opener.open(req)

        try:
            re_bug = re.compile(r"(?:\s+)?<title>.*Bug ([0-9]+) Submitted.*</title>")
            bug_match = re_bug.search(resp.read())
            if bug_match:
                return int(bug_match.group(1))
        except:
            pass

        return 0

    def attach(self, bugid, title, description, filename, content_type="text/plain", ispatch=False):
        """Attach a file to a bug.

		@param bugid: bug id
		@type  bugid: int
		@param title: short description of attachment
		@type  title: string
		@param description: long description of the attachment
		@type  description: string
		@param filename: filename of the attachment
		@type  filename: string
		@keywords content_type: mime-type of the attachment
		@type content_type: string

		@rtype: bool
		@return: True if successful, False if not successful.
		"""
        if not self.authenticated and not self.skip_auth:
            self.auth()

        qparams = config.params["attach_post"].copy()
        qparams["bugid"] = bugid
        qparams["description"] = title
        qparams["comment"] = description
        if ispatch:
            qparams["ispatch"] = "1"
            qparams["contenttypeentry"] = "text/plain"
        else:
            qparams["contenttypeentry"] = content_type

        filedata = [("data", filename, open(filename).read())]
        content_type, body = encode_multipart_formdata(qparams.items(), filedata)

        req_headers = config.headers.copy()
        req_headers["Content-type"] = content_type
        req_headers["Content-length"] = len(body)
        req_url = urljoin(self.base, config.urls["attach_post"])
        req = Request(req_url, body, req_headers)
        if self.httpuser and self.httppassword:
            base64string = base64.encodestring("%s:%s" % (self.httpuser, self.httppassword))[:-1]
            req.add_header("Authorization", "Basic %s" % base64string)
        resp = self.opener.open(req)

        # TODO: return attachment id and success?
        try:
            re_attach = re.compile(r"<title>(.+)</title>")
            # Bugzilla 3/4
            re_attach34 = re.compile(r"Attachment \d+ added to Bug \d+")
            response = resp.read()
            attach_match = re_attach.search(response)
            if attach_match:
                if attach_match.group(1) == "Changes Submitted" or re_attach34.match(attach_match.group(1)):
                    return True
                else:
                    return attach_match.group(1)
            else:
                return False
        except:
            pass

        return False
Esempio n. 24
0
class ControlPlaneClient(object):
    """
    """

    def __init__(self, user, password, host=None, port=None):
        """
        """
        self._cj = CookieJar()
        self._opener = urllib2.build_opener(
            urllib2.HTTPHandler(),
            urllib2.HTTPSHandler(),
            urllib2.HTTPCookieProcessor(self._cj)
        )
        # Zproxy always provides a proxy to serviced on port 443
        self._server = {
            "host": "127.0.0.1",
            "port": 443,
        }
        self._creds = {"username": user, "password": password}
        self._netloc = "%(host)s:%(port)s" % self._server
        self.cc_version = getCCVersion()
        self._hothOrNewer = False if self.cc_version == "1.1.X" else True
        self._useHttps = self._checkUseHttps()
        self._v2loc = "/api/v2"
        self._servicesEndpoint = "%s/services" % self._v2loc

    def _checkUseHttps(self):
        """
        Starting in CC 1.2.0, port 443 in the containers does not support https.
        """
        use_https = True
        cc_master = self._server.get("host")
        if self._hothOrNewer and cc_master in [ "localhost", "127.0.0.1" ]:
            use_https = False
        return use_https

    def queryServices(self, name=None, tags=None, tenantID=None):
        """
        Returns a sequence of ServiceDefinition objects that match
        the given requirements.
        """
        query = {}
        if name:
            namepat = fnmatch.translate(name)
            # controlplane regex accepts \z, not \Z.
            namepat = namepat.replace("\\Z", "\\z")
            query["name"] = namepat
        if tags:
            if isinstance(tags, (str, unicode)):
                tags = [tags]
            query["tags"] = ','.join(tags)
        if tenantID:
            query["tenantID"] = tenantID
        response = self._dorequest(self._servicesEndpoint, query=query)
        body = ''.join(response.readlines())
        response.close()
        decoded = ServiceJsonDecoder().decode(body)
        if decoded is None:
            decoded = []
        return decoded

    def getService(self, serviceId, default=None):
        """
        Returns the ServiceDefinition object for the given service.
        """
        response = self._dorequest("/services/%s" % serviceId)
        body = ''.join(response.readlines())
        response.close()
        return ServiceJsonDecoder().decode(body)

    def getChangesSince(self, age):
        """
        Returns a sequence of ServiceDefinition objects that have changed
        within the given age.  If there are no changes, and empty sequence
        is returned.

        :param age: How far back to look, in milliseconds, for changes.
        """
        query = {"since": age}
        response = self._dorequest(self._servicesEndpoint, query=query)
        body = ''.join(response.readlines())
        response.close()
        decoded = ServiceJsonDecoder().decode(body)
        if decoded is None:
            decoded = []
        return decoded

    def updateServiceProperty(self, service, prop):
        """
        Updates the launch property of a service.

        :param ServiceDefinition service: The modified definition
        """
        oldService = self.getService(service.id)
        oldService._data[prop] = service._data[prop]
        body = ServiceJsonEncoder().encode(oldService)
        LOG.info("Updating prop '%s' for service '%s':%s resourceId=%s", prop, service.name, service.id, service.resourceId)
        LOG.debug("Updating service %s", body)
        response = self._dorequest(
            service.resourceId, method="PUT", data=body
        )
        body = ''.join(response.readlines())
        response.close()

    def updateService(self, service):
        """
        Updates the definition/state of a service.

        :param ServiceDefinition service: The modified definition
        """
        body = ServiceJsonEncoder().encode(service)
        LOG.info("Updating service '%s':%s", service.name, service.id)
        LOG.debug("Updating service %s", body)
        response = self._dorequest(
            service.resourceId, method="PUT", data=body
        )
        body = ''.join(response.readlines())
        response.close()

    def startService(self, serviceId):
        """
        Start the given service

        :param string ServiceId: The service to start
        """
        LOG.info("Starting service '%s", serviceId)
        response = self._dorequest("/services/%s/startService" % serviceId,
                                   method='PUT')
        body = ''.join(response.readlines())
        response.close()
        return ServiceJsonDecoder().decode(body)

    def stopService(self, serviceId):
        """
        Stop the given service

        :param string ServiceId: The service to stop
        """
        LOG.info("Stopping service %s", serviceId)
        response = self._dorequest("/services/%s/stopService" % serviceId,
                                   method='PUT')
        body = ''.join(response.readlines())
        response.close()
        return ServiceJsonDecoder().decode(body)

    def addService(self, serviceDefinition):
        """
        Add a new service

        :param string serviceDefinition: json encoded representation of service
        :returns string: json encoded representation of new service's links
        """
        LOG.info("Adding service")
        LOG.debug(serviceDefinition)
        response = self._dorequest(
            "/services/add", method="POST", data=serviceDefinition
        )
        body = ''.join(response.readlines())
        response.close()
        return body

    def deleteService(self, serviceId):
        """
        Delete a service

        :param string serviceId: Id of the service to delete
        """
        LOG.info("Removing service %s", serviceId)
        response = self._dorequest(
            "/services/%s" % serviceId, method="DELETE"
        )
        response.close()

    def deployService(self, parentId, service):
        """
        Deploy a new service

        :param string parentId: parent service id
        :param string service: json encoded representation of service
        :returns string: json encoded representation of new service's links
        """
        LOG.info("Deploying service")
        data = {
            'ParentID': parentId,
            'Service': json.loads(service)
        }
        LOG.debug(data)
        response = self._dorequest(
            "/services/deploy", method="POST", data=json.dumps(data)
        )
        body = ''.join(response.readlines())
        response.close()
        return body

    def queryServiceInstances(self, serviceId):
        """
        Returns a sequence of ServiceInstance objects.
        """
        response = self._dorequest("/services/%s/running" % serviceId)
        body = ''.join(response.readlines())
        response.close()
        return ServiceJsonDecoder().decode(body)


    def queryServiceStatus(self, serviceId):
        """
        CC version-independent call to get the status of a service.
        Calls queryServiceStatusImpl or queryServiceInstancesV2 to get the
        status for serviceId.

        :param serviceId: The serviceId to get the status of
        :type serviceId: string

        :returns: The result of the query decoded
        :rtype: dict of ServiceStatus objects with ID as key
        """
        if self._hothOrNewer:
            raw = self.queryServiceInstancesV2(serviceId)
            decoded = self._convertInstancesV2ToStatuses(raw)
        else:
            decoded = self.queryServiceStatusImpl(serviceId)

        return decoded

    def queryServiceStatusImpl(self, serviceId):
        """
        Implementation for queryServiceStatus that uses the
        /services/:serviceid/status endpoint.

        :param serviceId: The serviceId to get the status of
        :type serviceId: string

        :returns: The result of the query decoded
        :rtype: dict of ServiceStatus objects with ID as key
        """
        response = self._dorequest("/services/%s/status" % serviceId)
        body = ''.join(response.readlines())
        response.close()
        decoded = ServiceStatusJsonDecoder().decode(body)
        return decoded

    def queryServiceInstancesV2(self, serviceId):
        """
        Uses the CC V2 api to query the instances of serviceId.

        :param serviceId: The serviceId to get the instances of
        :type serviceId: string

        :returns: The raw result of the query
        :rtype: json formatted string
        """
        response = self._dorequest("%s/services/%s/instances" % (self._v2loc,
                                                                 serviceId))
        body = ''.join(response.readlines())
        response.close()
        return body

    def _convertInstancesV2ToStatuses(self, rawV2Instance):
        """
        Converts a list of raw Instance (V2) json to a dict of ServiceStatuses.
        This is for compatibility sake.

        :param rawV2Instance: The result from a call to queryServiceInstancesV2
        :type rawV2Instance: json formatted string

        :returns: An acceptable output from queryServiceStatus
        :rtype: dict of ServiceStatus objects with ID as key
        """
        decoded = InstanceV2ToServiceStatusJsonDecoder().decode(rawV2Instance)
        # V2 gives us a list, we need a dict with ID as key
        decoded = {instance.id: instance for instance in decoded}
        return decoded


    def queryHosts(self):
        """
        Returns a sequence of Host objects.
        """
        response = self._dorequest("/hosts")
        body = ''.join(response.readlines())
        response.close()
        return HostJsonDecoder().decode(body)

    def getHost(self, hostId):
        """
        Returns a sequence of Host objects.
        """
        response = self._dorequest("/hosts/%" % hostId)
        body = ''.join(response.readlines())
        response.close()
        return HostJsonDecoder().decode(body)

    def getInstance(self, serviceId, instanceId, default=None):
        """
        Returns the requested ServiceInstance object.
        """
        response = self._dorequest(
            "/services/%s/running/%s" % (serviceId, instanceId)
        )
        body = ''.join(response.readlines())
        response.close()
        return ServiceJsonDecoder().decode(body)

    def getServiceLog(self, serviceId, start=0, end=None):
        """
        """
        response = self._dorequest("/services/%s/logs" % serviceId)
        body = ''.join(response.readlines())
        response.close()
        log = json.loads(body)
        return log["Detail"]

    def getInstanceLog(self, serviceId, instanceId, start=0, end=None):
        """
        """
        response = self._dorequest(
            "/services/%s/%s/logs" % (serviceId, instanceId)
        )
        body = ''.join(response.readlines())
        response.close()
        log = json.loads(body)
        return str(log["Detail"])

    def killInstance(self, hostId, uuid):
        """
        """
        response = self._dorequest(
            "/hosts/%s/%s" % (hostId, uuid), method="DELETE"
        )
        response.close()

    def getServicesForMigration(self, serviceId):
        """
        """
        query = {"includeChildren": "true"}
        response = self._dorequest("/services/%s" % serviceId, query=query)
        body = ''.join(response.readlines())
        response.close()
        return json.loads(body)

    def postServicesForMigration(self, data, serviceId):
        """
        """
        response = self._dorequest(
            "/services/%s/migrate" % serviceId, method="POST", data=data
        )
        body = ''.join(response.readlines())
        response.close()
        return body

    def getPoolsData(self):
        """
        Get all the pools and return raw json
        """
        response = self._dorequest("/pools")
        body = ''.join(response.readlines())
        response.close()
        return body

    def getHostsData(self):
        """
        Get all the pools and return raw json
        """
        response = self._dorequest("/hosts")
        body = ''.join(response.readlines())
        response.close()
        return body

    def getRunningServicesData(self):
        """
        Get all the running services and return raw json
        """
        body = ''
        if not self._hothOrNewer:
            response = self._dorequest("/running")
            body = ''.join(response.readlines())
            response.close()
        else:
            hostsData = self.queryHosts()
            for hostID in hostsData:
                response = self._dorequest("/hosts/%s/running" %hostID)
                body = body + ''.join(response.readlines())
                response.close()
        return body

    def getStorageData(self):
        """
        Get the storage information and return raw json
        """
        response = self._dorequest("/storage")
        body = ''.join(response.readlines())
        response.close()
        return body

    def _makeRequest(self, uri, method=None, data=None, query=None):
        query = urllib.urlencode(query) if query else ""
        url = urlunparse(("https" if self._useHttps else "http",
                          self._netloc, uri, "", query, ""))
        args = {}
        if method:
            args["method"] = method
        if data:
            args["data"] = data
            args["headers"] = {"Content-Type": "application/json"}
        return _Request(url, **args)

    def _login(self):
        # Clear the cookie jar before logging in.
        self._cj.clear()
        encodedbody = json.dumps(self._creds)
        request = self._makeRequest("/login", data=encodedbody)
        response = self._opener.open(request)
        response.close()
        self._opener.close()

    def _dorequest(self, uri, method=None, data=None, query=None):
        # Try to perform the request up to five times
        for trycount in range(5):
            request = self._makeRequest(uri, method=method, data=data, query=query)
            try:
                return self._opener.open(request)
            except urllib2.HTTPError as ex:
                if ex.getcode() == 401:
                    self._login()
                    continue
                elif ex.getcode() == 500:
                    # Make the exception prettier and reraise it
                    try:
                        msg = json.load(ex)
                    except ValueError:
                        raise ex  # This stinks because we lose the stack
                    detail = msg.get('Detail')
                    if not detail:
                        raise
                    detail = detail.replace("Internal Server Error: ", "")
                    raise ControlCenterError(detail)
                raise
            #   The CC server resets the connection when an unauthenticated POST requesti is
            # made.  Depending on when during the request lifecycle the connection is reset,
            # we can get either an URLError with a socket.error as the reason, or  a naked
            # socket.error.  In either case, the socket.error.errno indicates that the
            # connection was reset with an errno of ECONNRESET (104).
            #   When we get a connection reset exception, assume that the reset was caused
            # by lack of authentication, login, and retry the request.
            except urllib2.URLError as ex:
                reason = ex.reason
                if type(reason) == socket_error and reason.errno == ECONNRESET:
                    self._login()
                    continue
                raise
            except socket_error as ex:
                if ex.errno == ECONNRESET:
                   self._login()
                   continue
                raise
            else:
                # break the loop so we skip the loop's else clause
                break


        else:
            # raises the last exception that was raised (the 401 error)
            raise

    def _get_cookie_jar(self):
        return self._cj

    def cookies(self):
        """
        Get the cookie(s) being used.  If the cookie/cookiejar implementation
        changes, this method should be revisited.

        Return a list of dicts of cookies of the form:
            {
                'name':  'cookieName',
                'value': 'cookieValue',
                'domain': 'cookieDomain',
                'path': 'cookiePath',
                'expires': seconds from epoch to expore cookie, # leave blank to be a session cookie
                'secure': False/True,
            }
        """
        self._login()
        cookies = []
        for cookie in self._get_cookie_jar():
            cookies.append(
                {
                    'name': cookie.name,
                    'value': cookie.value,
                    'domain': cookie.domain,
                    'path': cookie.path,
                    'expires': cookie.expires,
                    'secure': cookie.discard
                }
            )
        return cookies
Esempio n. 25
0
class Site(object):
    """
    **EarwigBot: Wiki Toolset: Site**

    Represents a site, with support for API queries and returning
    :py:class:`~earwigbot.wiki.page.Page`,
    :py:class:`~earwigbot.wiki.user.User`,
    and :py:class:`~earwigbot.wiki.category.Category` objects. The constructor
    takes a bunch of arguments and you probably won't need to call it directly,
    rather :py:meth:`wiki.get_site() <earwigbot.wiki.sitesdb.SitesDB.get_site>`
    for returning :py:class:`Site`
    instances, :py:meth:`wiki.add_site()
    <earwigbot.wiki.sitesdb.SitesDB.add_site>` for adding new ones to our
    database, and :py:meth:`wiki.remove_site()
    <earwigbot.wiki.sitesdb.SitesDB.remove_site>` for removing old ones from
    our database, should suffice.

    *Attributes:*

    - :py:attr:`name`:    the site's name (or "wikiid"), like ``"enwiki"``
    - :py:attr:`project`: the site's project name, like ``"wikipedia"``
    - :py:attr:`lang`:    the site's language code, like ``"en"``
    - :py:attr:`domain`:  the site's web domain, like ``"en.wikipedia.org"``
    - :py:attr:`url`:     the site's URL, like ``"https://en.wikipedia.org"``

    *Public methods:*

    - :py:meth:`api_query`:            does an API query with kwargs as params
    - :py:meth:`sql_query`:            does an SQL query and yields its results
    - :py:meth:`get_maxlag`:           returns the internal database lag
    - :py:meth:`get_replag`:           estimates the external database lag
    - :py:meth:`get_token`:            gets a token for a specific API action
    - :py:meth:`namespace_id_to_name`: returns names associated with an NS id
    - :py:meth:`namespace_name_to_id`: returns the ID associated with a NS name
    - :py:meth:`get_page`:             returns a Page for the given title
    - :py:meth:`get_category`:         returns a Category for the given title
    - :py:meth:`get_user`:             returns a User object for the given name
    - :py:meth:`delegate`:             controls when the API or SQL is used
    """
    SERVICE_API = 1
    SERVICE_SQL = 2
    SPECIAL_TOKENS = [
        "createaccount", "deleteglobalaccount", "login", "patrol", "rollback",
        "setglobalaccountstatus", "userrights", "watch"
    ]

    def __init__(self,
                 name=None,
                 project=None,
                 lang=None,
                 base_url=None,
                 article_path=None,
                 script_path=None,
                 sql=None,
                 namespaces=None,
                 login=(None, None),
                 oauth=None,
                 cookiejar=None,
                 user_agent=None,
                 use_https=True,
                 assert_edit=None,
                 maxlag=None,
                 wait_between_queries=2,
                 logger=None,
                 search_config=None):
        """Constructor for new Site instances.

        This probably isn't necessary to call yourself unless you're building a
        Site that's not in your config and you don't want to add it - normally
        all you need is wiki.get_site(name), which creates the Site for you
        based on your config file and the sites database. We accept a bunch of
        kwargs, but the only ones you really "need" are *base_url* and
        *script_path*; this is enough to figure out an API url. *login*, a
        tuple of (username, password), can be used to log in using the legacy
        BotPasswords system; otherwise, a dict of OAuth info should be provided
        to *oauth*. *cookiejar* will be used to store cookies, and we'll use a
        normal CookieJar if none is given.

        First, we'll store the given arguments as attributes, then set up our
        requests session. We'll load any of the attributes that weren't given
        from the API, and then log in if a username/pass was given and we
        aren't already logged in.
        """
        # Attributes referring to site information, filled in by an API query
        # if they are missing (and an API url can be determined):
        self._name = name
        self._project = project
        self._lang = lang
        self._base_url = base_url
        self._article_path = article_path
        self._script_path = script_path
        self._namespaces = namespaces

        # Attributes used for API queries:
        self._use_https = use_https
        self._assert_edit = assert_edit
        self._maxlag = maxlag
        self._wait_between_queries = wait_between_queries
        self._max_retries = 6
        self._last_query_time = 0
        self._tokens = {}
        self._api_lock = RLock()
        self._api_info_cache = {"maxlag": 0, "lastcheck": 0}

        # Attributes used for SQL queries:
        if sql:
            self._sql_data = sql
        else:
            self._sql_data = {}
        self._sql_conn = None
        self._sql_lock = RLock()
        self._sql_info_cache = {"replag": 0, "lastcheck": 0, "usable": None}

        # Attribute used in copyright violation checks (see CopyrightMixIn):
        if search_config:
            self._search_config = search_config
        else:
            self._search_config = {}

        # Set up cookiejar and requests session for making API queries:
        if cookiejar is not None:
            self._cookiejar = cookiejar
        else:
            self._cookiejar = CookieJar()
        self._last_cookiejar_save = None
        if not user_agent:
            user_agent = constants.USER_AGENT  # Set default UA
        self._oauth = oauth
        self._session = requests.Session()
        self._session.cookies = self._cookiejar
        self._session.headers["User-Agent"] = user_agent
        if oauth:
            self._session.auth = OAuth1(oauth["consumer_token"],
                                        oauth["consumer_secret"],
                                        oauth["access_token"],
                                        oauth["access_secret"])

        # Set up our internal logger:
        if logger:
            self._logger = logger
        else:  # Just set up a null logger to eat up our messages:
            self._logger = getLogger("earwigbot.wiki")
            self._logger.addHandler(NullHandler())

        # Get all of the above attributes that were not specified as arguments:
        self._load_attributes()

        # If we have a name/pass and the API says we're not logged in, log in:
        self._login_info = name, password = login
        if not self._oauth and name and password:
            logged_in_as = self._get_username_from_cookies()
            if not logged_in_as or name.replace("_", " ") != logged_in_as:
                self._login(login)

    def __repr__(self):
        """Return the canonical string representation of the Site."""
        res = ", ".join(
            ("Site(name={_name!r}", "project={_project!r}", "lang={_lang!r}",
             "base_url={_base_url!r}", "article_path={_article_path!r}",
             "script_path={_script_path!r}", "use_https={_use_https!r}",
             "assert_edit={_assert_edit!r}", "maxlag={_maxlag!r}",
             "sql={_sql_data!r}", "login={0}", "oauth={1}", "user_agent={3!r}",
             "cookiejar={2})"))
        name, password = self._login_info
        login = "******".format(repr(name), "hidden" if password else None)
        oauth = "hidden" if self._oauth else None
        cookies = self._cookiejar.__class__.__name__
        if hasattr(self._cookiejar, "filename"):
            cookies += "({0!r})".format(getattr(self._cookiejar, "filename"))
        else:
            cookies += "()"
        agent = self.user_agent
        return res.format(login, oauth, cookies, agent, **self.__dict__)

    def __str__(self):
        """Return a nice string representation of the Site."""
        res = "<Site {0} ({1}:{2}) at {3}>"
        return res.format(self.name, self.project, self.lang, self.domain)

    def _unicodeify(self, value, encoding="utf8"):
        """Return input as unicode if it's not unicode to begin with."""
        if isinstance(value, unicode):
            return value
        return unicode(value, encoding)

    def _urlencode_utf8(self, params):
        """Implement urllib.urlencode() with support for unicode input."""
        enc = lambda s: s.encode("utf8") if isinstance(s, unicode) else str(s)
        args = []
        for key, val in params.iteritems():
            key = quote_plus(enc(key))
            val = quote_plus(enc(val))
            args.append(key + "=" + val)
        return "&".join(args)

    def _api_query(self,
                   params,
                   tries=0,
                   wait=5,
                   ignore_maxlag=False,
                   no_assert=False,
                   ae_retry=True):
        """Do an API query with *params* as a dict of parameters.

        See the documentation for :py:meth:`api_query` for full implementation
        details. *tries*, *wait*, and *ignore_maxlag* are for maxlag;
        *no_assert* and *ae_retry* are for AssertEdit.
        """
        since_last_query = time() - self._last_query_time  # Throttling support
        if since_last_query < self._wait_between_queries:
            wait_time = self._wait_between_queries - since_last_query
            log = "Throttled: waiting {0} seconds".format(round(wait_time, 2))
            self._logger.debug(log)
            sleep(wait_time)
        self._last_query_time = time()

        url, data = self._build_api_query(params, ignore_maxlag, no_assert)
        if "lgpassword" in params:
            self._logger.debug("{0} -> <hidden>".format(url))
        elif len(data) > 1000:
            self._logger.debug("{0} -> {1}...".format(url, data[:997]))
        else:
            self._logger.debug("{0} -> {1}".format(url, data))

        try:
            response = self._session.post(url, data=data)
            response.raise_for_status()
        except requests.RequestException as exc:
            raise exceptions.APIError("API query failed: {0}".format(exc))

        return self._handle_api_result(response, params, tries, wait, ae_retry)

    def _request_csrf_token(self, params):
        """If possible, add a request for a CSRF token to an API query."""
        if params.get("action") == "query":
            if params.get("meta"):
                if "tokens" not in params["meta"].split("|"):
                    params["meta"] += "|tokens"
            else:
                params["meta"] = "tokens"
            if params.get("type"):
                if "csrf" not in params["type"].split("|"):
                    params["type"] += "|csrf"

    def _build_api_query(self, params, ignore_maxlag, no_assert):
        """Given API query params, return the URL to query and POST data."""
        if not self._base_url or self._script_path is None:
            e = "Tried to do an API query, but no API URL is known."
            raise exceptions.APIError(e)

        url = self.url + self._script_path + "/api.php"
        params["format"] = "json"  # This is the only format we understand
        if self._assert_edit and not no_assert:
            # If requested, ensure that we're logged in
            params["assert"] = self._assert_edit
        if self._maxlag and not ignore_maxlag:
            # If requested, don't overload the servers:
            params["maxlag"] = self._maxlag
        if "csrf" not in self._tokens:
            # If we don't have a CSRF token, try to fetch one:
            self._request_csrf_token(params)

        data = self._urlencode_utf8(params)
        return url, data

    def _handle_api_result(self, response, params, tries, wait, ae_retry):
        """Given an API query response, attempt to return useful data."""
        try:
            res = response.json()
        except ValueError:
            e = "API query failed: JSON could not be decoded."
            raise exceptions.APIError(e)

        if "warnings" in res:
            for name, value in res["warnings"].items():
                try:
                    warning = value["warnings"]
                except KeyError:
                    try:
                        warning = value["*"]
                    except KeyError:
                        warning = value
                self._logger.warning("API warning: %s: %s", name, warning)

        if self._should_save_cookiejar():
            self._save_cookiejar()

        try:
            code = res["error"]["code"]
            info = res["error"]["info"]
        except (TypeError, KeyError):  # If there's no error code/info, return
            if "query" in res and "tokens" in res["query"]:
                for name, token in res["query"]["tokens"].iteritems():
                    self._tokens[name.split("token")[0]] = token
            return res

        if code == "maxlag":  # We've been throttled by the server
            if tries >= self._max_retries:
                e = "Maximum number of retries reached ({0})."
                raise exceptions.APIError(e.format(self._max_retries))
            tries += 1
            msg = 'Server says "{0}"; retrying in {1} seconds ({2}/{3})'
            self._logger.info(msg.format(info, wait, tries, self._max_retries))
            sleep(wait)
            return self._api_query(params, tries, wait * 2, ae_retry=ae_retry)
        elif code in ["assertuserfailed", "assertbotfailed"]:  # AssertEdit
            if ae_retry and all(self._login_info) and not self._oauth:
                # Try to log in if we got logged out:
                self._login(self._login_info)
                if "token" in params:  # Fetch a new one; this is invalid now
                    params["token"] = self.get_token(params["action"])
                return self._api_query(params, tries, wait, ae_retry=False)
            if not all(self._login_info) and not self._oauth:
                e = "Assertion failed, and no login info was provided."
            elif code == "assertbotfailed":
                e = "Bot assertion failed: we don't have a bot flag!"
            else:
                e = "User assertion failed due to an unknown issue. Cookie or OAuth problem?"
            raise exceptions.PermissionsError("AssertEdit: " + e)
        else:  # Some unknown error occurred
            e = 'API query failed: got error "{0}"; server says: "{1}".'
            error = exceptions.APIError(e.format(code, info))
            error.code, error.info = code, info
            raise error

    def _load_attributes(self, force=False):
        """Load data about our Site from the API.

        This function is called by __init__() when one of the site attributes
        was not given as a keyword argument. We'll do an API query to get the
        missing data, but only if there actually *is* missing data.

        Additionally, you can call this with *force* set to True to forcibly
        reload all attributes.
        """
        # All attributes to be loaded, except _namespaces, which is a special
        # case because it requires additional params in the API query:
        attrs = [
            self._name, self._project, self._lang, self._base_url,
            self._article_path, self._script_path
        ]

        params = {"action": "query", "meta": "siteinfo", "siprop": "general"}

        if not self._namespaces or force:
            params["siprop"] += "|namespaces|namespacealiases"
            with self._api_lock:
                result = self._api_query(params, no_assert=True)
            self._load_namespaces(result)
        elif all(attrs):  # Everything is already specified and we're not told
            return  # to force a reload, so do nothing
        else:  # We're only loading attributes other than _namespaces
            with self._api_lock:
                result = self._api_query(params, no_assert=True)

        res = result["query"]["general"]
        self._name = res["wikiid"]
        self._project = res["sitename"].lower()
        self._lang = res["lang"]
        self._base_url = res["server"]
        self._article_path = res["articlepath"]
        self._script_path = res["scriptpath"]

    def _load_namespaces(self, result):
        """Fill self._namespaces with a dict of namespace IDs and names.

        Called by _load_attributes() with API data as *result* when
        self._namespaces was not given as an kwarg to __init__().
        """
        self._namespaces = {}

        for namespace in result["query"]["namespaces"].values():
            ns_id = namespace["id"]
            name = namespace["*"]
            try:
                canonical = namespace["canonical"]
            except KeyError:
                self._namespaces[ns_id] = [name]
            else:
                if name != canonical:
                    self._namespaces[ns_id] = [name, canonical]
                else:
                    self._namespaces[ns_id] = [name]

        for namespace in result["query"]["namespacealiases"]:
            ns_id = namespace["id"]
            alias = namespace["*"]
            self._namespaces[ns_id].append(alias)

    def _get_cookie(self, name, domain):
        """Return the named cookie unless it is expired or doesn't exist."""
        for cookie in self._cookiejar:
            if cookie.name == name and cookie.domain == domain:
                if cookie.is_expired():
                    break
                return cookie

    def _get_username_from_cookies(self):
        """Try to return our username based solely on cookies.

        First, we'll look for a cookie named self._name + "Token", like
        "enwikiToken". If it exists and isn't expired, we'll assume it's valid
        and try to return the value of the cookie self._name + "UserName" (like
        "enwikiUserName"). This should work fine on wikis without single-user
        login.

        If `enwikiToken` doesn't exist, we'll try to find a cookie named
        `centralauth_Token`. If this exists and is not expired, we'll try to
        return the value of `centralauth_User`.

        If we didn't get any matches, we'll return None. Our goal here isn't to
        return the most likely username, or what we *want* our username to be
        (for that, we'd do self._login_info[0]), but rather to get our current
        username without an unnecessary ?action=query&meta=userinfo API query.
        """
        name = ''.join((self._name, "Token"))
        cookie = self._get_cookie(name, self.domain)

        if cookie:
            name = ''.join((self._name, "UserName"))
            user_name = self._get_cookie(name, self.domain)
            if user_name:
                return unquote_plus(user_name.value)

        for cookie in self._cookiejar:
            if cookie.name != "centralauth_Token" or cookie.is_expired():
                continue
            base = cookie.domain
            if base.startswith(".") and not cookie.domain_initial_dot:
                base = base[1:]
            if self.domain.endswith(base):
                user_name = self._get_cookie("centralauth_User", cookie.domain)
                if user_name:
                    return unquote_plus(user_name.value)

    def _get_username_from_api(self):
        """Do a simple API query to get our username and return it.

        This is a reliable way to make sure we are actually logged in, because
        it doesn't deal with annoying cookie logic, but it results in an API
        query that is unnecessary in some cases.

        Called by _get_username() (in turn called by get_user() with no
        username argument) when cookie lookup fails, probably indicating that
        we are logged out.
        """
        result = self.api_query(action="query", meta="userinfo")
        return result["query"]["userinfo"]["name"]

    def _get_username(self):
        """Return the name of the current user, whether logged in or not.

        First, we'll try to deduce it solely from cookies, to avoid an
        unnecessary API query. For the cookie-detection method, see
        _get_username_from_cookies()'s docs.

        If our username isn't in cookies, then we're either using OAuth or
        we're probably not logged in, or something fishy is going on (like
        forced logout). If we're using OAuth and a username was configured,
        assume it is accurate and use it. Otherwise, do a single API query for
        our username (or IP address) and return that.
        """
        name = self._get_username_from_cookies()
        if name:
            return name
        if self._oauth and self._login_info[0]:
            return self._login_info[0]
        return self._get_username_from_api()

    def _should_save_cookiejar(self):
        """Return a bool indicating whether we should save the cookiejar.

        This is True if we haven't saved the cookiejar yet this session, or if
        our last save was over a day ago.
        """
        max_staleness = 60 * 60 * 24  # 1 day
        if not self._last_cookiejar_save:
            return True
        return time() - self._last_cookiejar_save > max_staleness

    def _save_cookiejar(self):
        """Try to save our cookiejar after doing a (normal) login or logout.

        Calls the standard .save() method with no filename. Don't fret if our
        cookiejar doesn't support saving (CookieJar raises AttributeError,
        FileCookieJar raises NotImplementedError) or no default filename was
        given (LWPCookieJar and MozillaCookieJar raise ValueError).
        """
        if hasattr(self._cookiejar, "save"):
            try:
                getattr(self._cookiejar, "save")()
            except (NotImplementedError, ValueError):
                pass
        self._last_cookiejar_save = time()

    def _login(self, login):
        """Safely login through the API.

        Normally, this is called by __init__() if a username and password have
        been provided and no valid login cookies were found. The only other
        time it needs to be called is when those cookies expire, which is done
        automatically by api_query() if a query fails.

        *login* is a (username, password) tuple.

        Raises LoginError on login errors (duh), like bad passwords and
        nonexistent usernames.
        """
        self._tokens.clear()
        name, password = login

        params = {"action": "query", "meta": "tokens", "type": "login"}
        with self._api_lock:
            result = self._api_query(params, no_assert=True)
        try:
            token = result["query"]["tokens"]["logintoken"]
        except KeyError:
            raise exceptions.LoginError("Couldn't get login token")

        params = {
            "action": "login",
            "lgname": name,
            "lgpassword": password,
            "lgtoken": token
        }
        with self._api_lock:
            result = self._api_query(params, no_assert=True)

        res = result["login"]["result"]
        if res == "Success":
            self._tokens.clear()
            self._save_cookiejar()
            return
        if res == "Illegal":
            e = "The provided username is illegal."
        elif res == "NotExists":
            e = "The provided username does not exist."
        elif res == "EmptyPass":
            e = "No password was given."
        elif res == "WrongPass" or res == "WrongPluginPass":
            e = "The given password is incorrect."
        else:
            e = "Couldn't login; server says '{0}'.".format(res)
        raise exceptions.LoginError(e)

    def _logout(self):
        """Safely logout through the API.

        We'll do a simple API request (api.php?action=logout), clear our
        cookiejar (which probably contains now-invalidated cookies) and try to
        save it, if it supports that sort of thing.
        """
        self.api_query(action="logout")
        self._cookiejar.clear()
        self._save_cookiejar()

    def _sql_connect(self, **kwargs):
        """Attempt to establish a connection with this site's SQL database.

        oursql.connect() will be called with self._sql_data as its kwargs.
        Any kwargs given to this function will be passed to connect() and will
        have precedence over the config file.

        Will raise SQLError() if the module "oursql" is not available. oursql
        may raise its own exceptions (e.g. oursql.InterfaceError) if it cannot
        establish a connection.
        """
        args = self._sql_data
        for key, value in kwargs.iteritems():
            args[key] = value
        if "read_default_file" not in args and "user" not in args and "passwd" not in args:
            args["read_default_file"] = expanduser("~/.my.cnf")
        elif "read_default_file" in args:
            args["read_default_file"] = expanduser(args["read_default_file"])
        if "autoping" not in args:
            args["autoping"] = True
        if "autoreconnect" not in args:
            args["autoreconnect"] = True

        try:
            self._sql_conn = oursql.connect(**args)
        except ImportError:
            e = "SQL querying requires the 'oursql' package: http://packages.python.org/oursql/"
            raise exceptions.SQLError(e)

    def _get_service_order(self):
        """Return a preferred order for using services (e.g. the API and SQL).

        A list is returned, starting with the most preferred service first and
        ending with the least preferred one. Currently, there are only two
        services. SERVICE_API will always be included since the API is expected
        to be always usable. In normal circumstances, self.SERVICE_SQL will be
        first (with the API second), since using SQL directly is easier on the
        servers than making web queries with the API. self.SERVICE_SQL will be
        second if replag is greater than three minutes (a cached value updated
        every two minutes at most), *unless* API lag is also very high.
        self.SERVICE_SQL will not be included in the list if we cannot form a
        proper SQL connection.
        """
        now = time()
        if now - self._sql_info_cache["lastcheck"] > 120:
            self._sql_info_cache["lastcheck"] = now
            try:
                self._sql_info_cache["replag"] = sqllag = self.get_replag()
            except (exceptions.SQLError, oursql.Error):
                self._sql_info_cache["usable"] = False
                return [self.SERVICE_API]
            self._sql_info_cache["usable"] = True
        else:
            if not self._sql_info_cache["usable"]:
                return [self.SERVICE_API]
            sqllag = self._sql_info_cache["replag"]

        if sqllag > 300:
            if not self._maxlag:
                return [self.SERVICE_API, self.SERVICE_SQL]
            if now - self._api_info_cache["lastcheck"] > 300:
                self._api_info_cache["lastcheck"] = now
                try:
                    self._api_info_cache["maxlag"] = apilag = self.get_maxlag()
                except exceptions.APIError:
                    self._api_info_cache["maxlag"] = apilag = 0
            else:
                apilag = self._api_info_cache["maxlag"]
            if apilag > self._maxlag:
                return [self.SERVICE_SQL, self.SERVICE_API]
            return [self.SERVICE_API, self.SERVICE_SQL]

        return [self.SERVICE_SQL, self.SERVICE_API]

    @property
    def name(self):
        """The Site's name (or "wikiid" in the API), like ``"enwiki"``."""
        return self._name

    @property
    def project(self):
        """The Site's project name in lowercase, like ``"wikipedia"``."""
        return self._project

    @property
    def lang(self):
        """The Site's language code, like ``"en"`` or ``"es"``."""
        return self._lang

    @property
    def domain(self):
        """The Site's web domain, like ``"en.wikipedia.org"``."""
        return urlparse(self._base_url).netloc

    @property
    def url(self):
        """The Site's full base URL, like ``"https://en.wikipedia.org"``."""
        url = self._base_url
        if url.startswith("//"):  # Protocol-relative URLs from 1.18
            if self._use_https:
                url = "https:" + url
            else:
                url = "http:" + url
        return url

    @property
    def user_agent(self):
        """The User-Agent header sent to the API by the requests session."""
        return self._session.headers["User-Agent"]

    def api_query(self, **kwargs):
        """Do an API query with `kwargs` as the parameters.

        This will first attempt to construct an API url from
        :py:attr:`self._base_url` and :py:attr:`self._script_path`. We need
        both of these, or else we'll raise
        :py:exc:`~earwigbot.exceptions.APIError`. If
        :py:attr:`self._base_url` is protocol-relative (introduced in MediaWiki
        1.18), we'll choose HTTPS only if :py:attr:`self._user_https` is
        ``True``, otherwise HTTP.

        We'll encode the given params, adding ``format=json`` along the way, as
        well as ``&assert=`` and ``&maxlag=`` based on
        :py:attr:`self._assert_edit` and :py:attr:`_maxlag` respectively.
        Additionally, we'll sleep a bit if the last query was made fewer than
        :py:attr:`self._wait_between_queries` seconds ago. The request is made
        through :py:attr:`self._session`, which has cookie support
        (:py:attr:`self._cookiejar`) and a ``User-Agent``
        (:py:const:`earwigbot.wiki.constants.USER_AGENT`).

        Assuming everything went well, we'll gunzip the data (if compressed),
        load it as a JSON object, and return it.

        If our request failed for some reason, we'll raise
        :py:exc:`~earwigbot.exceptions.APIError` with details. If that
        reason was due to maxlag, we'll sleep for a bit and then repeat the
        query until we exceed :py:attr:`self._max_retries`.

        There is helpful MediaWiki API documentation at `MediaWiki.org
        <https://www.mediawiki.org/wiki/API>`_.
        """
        with self._api_lock:
            return self._api_query(kwargs)

    def sql_query(self,
                  query,
                  params=(),
                  plain_query=False,
                  dict_cursor=False,
                  cursor_class=None,
                  show_table=False,
                  buffsize=1024):
        """Do an SQL query and yield its results.

        If *plain_query* is ``True``, we will force an unparameterized query.
        Specifying both *params* and *plain_query* will cause an error. If
        *dict_cursor* is ``True``, we will use :py:class:`oursql.DictCursor` as
        our cursor, otherwise the default :py:class:`oursql.Cursor`. If
        *cursor_class* is given, it will override this option. If *show_table*
        is True, the name of the table will be prepended to the name of the
        column. This will mainly affect an :py:class:`~oursql.DictCursor`.

        *buffsize* is the size of each memory-buffered group of results, to
        reduce the number of conversations with the database; it is passed to
        :py:meth:`cursor.fetchmany() <oursql.Cursor.fetchmany>`. If set to
        ``0```, all results will be buffered in memory at once (this uses
        :py:meth:`fetchall() <oursql.Cursor.fetchall>`). If set to ``1``, it is
        equivalent to using :py:meth:`fetchone() <oursql.Cursor.fetchone>`.

        Example usage::

            >>> query = "SELECT user_id, user_registration FROM user WHERE user_name = ?"
            >>> params = ("The Earwig",)
            >>> result1 = site.sql_query(query, params)
            >>> result2 = site.sql_query(query, params, dict_cursor=True)
            >>> for row in result1: print row
            (7418060L, '20080703215134')
            >>> for row in result2: print row
            {'user_id': 7418060L, 'user_registration': '20080703215134'}

        This may raise :py:exc:`~earwigbot.exceptions.SQLError` or one of
        oursql's exceptions (:py:exc:`oursql.ProgrammingError`,
        :py:exc:`oursql.InterfaceError`, ...) if there were problems with the
        query.

        See :py:meth:`_sql_connect` for information on how a connection is
        acquired. Also relevant is `oursql's documentation
        <http://packages.python.org/oursql>`_ for details on that package.
        """
        if not cursor_class:
            if dict_cursor:
                cursor_class = oursql.DictCursor
            else:
                cursor_class = oursql.Cursor
        klass = cursor_class

        with self._sql_lock:
            if not self._sql_conn:
                self._sql_connect()
            with self._sql_conn.cursor(klass, show_table=show_table) as cur:
                cur.execute(query, params, plain_query)
                if buffsize:
                    while True:
                        group = cur.fetchmany(buffsize)
                        if not group:
                            return
                        for result in group:
                            yield result
                for result in cur.fetchall():
                    yield result

    def get_maxlag(self, showall=False):
        """Return the internal database replication lag in seconds.

        In a typical setup, this function returns the replication lag *within*
        the WMF's cluster, *not* external replication lag affecting the
        Toolserver (see :py:meth:`get_replag` for that). This is useful when
        combined with the ``maxlag`` API query param (added by config), in
        which queries will be halted and retried if the lag is too high,
        usually above five seconds.

        With *showall*, will return a list of the lag for all servers in the
        cluster, not just the one with the highest lag.
        """
        params = {"action": "query", "meta": "siteinfo", "siprop": "dbrepllag"}
        if showall:
            params["sishowalldb"] = 1
        with self._api_lock:
            result = self._api_query(params, ignore_maxlag=True)
        if showall:
            return [server["lag"] for server in result["query"]["dbrepllag"]]
        return result["query"]["dbrepllag"][0]["lag"]

    def get_replag(self):
        """Return the estimated external database replication lag in seconds.

        Requires SQL access. This function only makes sense on a replicated
        database (e.g. the Wikimedia Toolserver) and on a wiki that receives a
        large number of edits (ideally, at least one per second), or the result
        may be larger than expected, since it works by subtracting the current
        time from the timestamp of the latest recent changes event.

        This may raise :py:exc:`~earwigbot.exceptions.SQLError` or one of
        oursql's exceptions (:py:exc:`oursql.ProgrammingError`,
        :py:exc:`oursql.InterfaceError`, ...) if there were problems.
        """
        query = """SELECT UNIX_TIMESTAMP() - UNIX_TIMESTAMP(rc_timestamp) FROM
                   recentchanges ORDER BY rc_timestamp DESC LIMIT 1"""
        result = list(self.sql_query(query))
        return int(result[0][0])

    def get_token(self, action=None, force=False):
        """Return a token for a data-modifying API action.

        In general, this will be a CSRF token, unless *action* is in a special
        list of non-CSRF tokens. Tokens are cached for the session (until
        :meth:`_login` is called again); set *force* to ``True`` to force a new
        token to be fetched.

        Raises :exc:`.APIError` if there was an API issue.
        """
        if action not in self.SPECIAL_TOKENS:
            action = "csrf"
        if action in self._tokens and not force:
            return self._tokens[action]

        res = self.api_query(action="query", meta="tokens", type=action)
        if action not in self._tokens:
            err = "Tried to fetch a {0} token, but API returned: {1}"
            raise exceptions.APIError(err.format(action, res))
        return self._tokens[action]

    def namespace_id_to_name(self, ns_id, all=False):
        """Given a namespace ID, returns associated namespace names.

        If *all* is ``False`` (default), we'll return the first name in the
        list, which is usually the localized version. Otherwise, we'll return
        the entire list, which includes the canonical name. For example, this
        returns ``u"Wikipedia"`` if *ns_id* = ``4`` and *all* is ``False`` on
        ``enwiki``; returns ``[u"Wikipedia", u"Project", u"WP"]`` if *ns_id* =
        ``4`` and *all* is ``True``.

        Raises :py:exc:`~earwigbot.exceptions.NamespaceNotFoundError` if the ID
        is not found.
        """
        try:
            if all:
                return self._namespaces[ns_id]
            else:
                return self._namespaces[ns_id][0]
        except KeyError:
            e = "There is no namespace with id {0}.".format(ns_id)
            raise exceptions.NamespaceNotFoundError(e)

    def namespace_name_to_id(self, name):
        """Given a namespace name, returns the associated ID.

        Like :py:meth:`namespace_id_to_name`, but reversed. Case is ignored,
        because namespaces are assumed to be case-insensitive.

        Raises :py:exc:`~earwigbot.exceptions.NamespaceNotFoundError` if the
        name is not found.
        """
        lname = name.lower()
        for ns_id, names in self._namespaces.items():
            lnames = [n.lower() for n in names]  # Be case-insensitive
            if lname in lnames:
                return ns_id

        e = u"There is no namespace with name '{0}'.".format(name)
        raise exceptions.NamespaceNotFoundError(e)

    def get_page(self, title, follow_redirects=False, pageid=None):
        """Return a :py:class:`Page` object for the given title.

        *follow_redirects* is passed directly to
        :py:class:`~earwigbot.wiki.page.Page`'s constructor. Also, this will
        return a :py:class:`~earwigbot.wiki.category.Category` object instead
        if the given title is in the category namespace. As
        :py:class:`~earwigbot.wiki.category.Category` is a subclass of
        :py:class:`~earwigbot.wiki.page.Page`, this should not cause problems.

        Note that this doesn't do any direct checks for existence or
        redirect-following: :py:class:`~earwigbot.wiki.page.Page`'s methods
        provide that.
        """
        title = self._unicodeify(title)
        prefixes = self.namespace_id_to_name(constants.NS_CATEGORY, all=True)
        prefix = title.split(":", 1)[0]
        if prefix != title:  # Avoid a page that is simply "Category"
            if prefix in prefixes:
                return Category(self, title, follow_redirects, pageid,
                                self._logger)
        return Page(self, title, follow_redirects, pageid, self._logger)

    def get_category(self, catname, follow_redirects=False, pageid=None):
        """Return a :py:class:`Category` object for the given category name.

        *catname* should be given *without* a namespace prefix. This method is
        really just shorthand for :py:meth:`get_page("Category:" + catname)
        <get_page>`.
        """
        catname = self._unicodeify(catname)
        prefix = self.namespace_id_to_name(constants.NS_CATEGORY)
        pagename = u':'.join((prefix, catname))
        return Category(self, pagename, follow_redirects, pageid, self._logger)

    def get_user(self, username=None):
        """Return a :py:class:`User` object for the given username.

        If *username* is left as ``None``, then a
        :py:class:`~earwigbot.wiki.user.User` object representing the currently
        logged-in (or anonymous!) user is returned.
        """
        if username:
            username = self._unicodeify(username)
        else:
            username = self._get_username()
        return User(self, username, self._logger)

    def delegate(self, services, args=None, kwargs=None):
        """Delegate a task to either the API or SQL depending on conditions.

        *services* should be a dictionary in which the key is the service name
        (:py:attr:`self.SERVICE_API <SERVICE_API>` or
        :py:attr:`self.SERVICE_SQL <SERVICE_SQL>`), and the value is the
        function to call for this service. All functions will be passed the
        same arguments the tuple *args* and the dict *kwargs*, which are both
        empty by default. The service order is determined by
        :py:meth:`_get_service_order`.

        Not every service needs an entry in the dictionary. Will raise
        :py:exc:`~earwigbot.exceptions.NoServiceError` if an appropriate
        service cannot be found.
        """
        if not args:
            args = ()
        if not kwargs:
            kwargs = {}

        order = self._get_service_order()
        for srv in order:
            if srv in services:
                try:
                    return services[srv](*args, **kwargs)
                except exceptions.ServiceError:
                    continue
        raise exceptions.NoServiceError(services)
Esempio n. 26
0
class ControlPlaneClient(object):
    """
    """

    def __init__(self, user, password, host=None, port=None):
        """
        """
        self._cj = CookieJar()
        self._opener = urllib2.build_opener(
            urllib2.HTTPHandler(),
            urllib2.HTTPSHandler(),
            urllib2.HTTPCookieProcessor(self._cj)
        )
        self._server = {
            "host": host if host else _DEFAULT_HOST,
            "port": port if port else _DEFAULT_PORT,
        }
        self._creds = {"username": user, "password": password}
        self._netloc = "%(host)s:%(port)s" % self._server

    def queryServices(self, name=None, tags=None, tenantID=None):
        """
        Returns a sequence of ServiceDefinition objects that match
        the given requirements.
        """
        query = {}
        if name:
            namepat = fnmatch.translate(name)
            # controlplane regex accepts \z, not \Z.
            namepat = namepat.replace("\\Z", "\\z")
            query["name"] = namepat
        if tags:
            if isinstance(tags, (str, unicode)):
                tags = [tags]
            query["tags"] = ','.join(tags)
        if tenantID:
            query["tenantID"] = tenantID
        response = self._dorequest("/services", query=query)
        body = ''.join(response.readlines())
        response.close()
        decoded = ServiceJsonDecoder().decode(body)
        if decoded is None:
            decoded = []
        return decoded

    def getService(self, serviceId, default=None):
        """
        Returns the ServiceDefinition object for the given service.
        """
        response = self._dorequest("/services/%s" % serviceId)
        body = ''.join(response.readlines())
        response.close()
        return ServiceJsonDecoder().decode(body)

    def updateService(self, service):
        """
        Updates the definition/state of a service.

        :param ServiceDefinition service: The modified definition
        """
        body = ServiceJsonEncoder().encode(service)
        LOG.info("Updating service '%s':%s", service.name, service.id)
        LOG.debug("Updating service %s", body)
        response = self._dorequest(
            service.resourceId, method="PUT", data=body
        )
        body = ''.join(response.readlines())
        response.close()

    def startService(self, serviceId):
        """
        Start the given service

        :param string ServiceId: The service to start
        """
        LOG.info("Starting service '%s", serviceId)
        response = self._dorequest("/services/%s/startService" % serviceId,
                                   method='PUT')
        body = ''.join(response.readlines())
        response.close()
        return ServiceJsonDecoder().decode(body)

    def stopService(self, serviceId):
        """
        Stop the given service

        :param string ServiceId: The service to stop
        """
        LOG.info("Stopping service '%s", serviceId)
        response = self._dorequest("/services/%s/stopService" % serviceId,
                                   method='PUT')
        body = ''.join(response.readlines())
        response.close()
        return ServiceJsonDecoder().decode(body)

    def addService(self, serviceDefinition):
        """
        Add a new service

        :param string serviceDefinition: json encoded representation of service
        :returns string: json encoded representation of new service's links
        """
        LOG.info("Adding service")
        LOG.debug(serviceDefinition)
        response = self._dorequest(
            "/services/add", method="POST", data=serviceDefinition
        )
        body = ''.join(response.readlines())
        response.close()
        return body

    def deleteService(self, serviceId):
        """
        Delete a service

        :param string serviceId: Id of the service to delete
        """
        LOG.info("Removing service %s", serviceId)
        response = self._dorequest(
            "/services/%s" % serviceId, method="DELETE"
        )
        response.close()

    def deployService(self, parentId, service):
        """
        Deploy a new service

        :param string parentId: parent service id
        :param string service: json encoded representation of service
        :returns string: json encoded representation of new service's links
        """
        LOG.info("Deploying service")
        data = {
            'ParentID': parentId,
            'Service': json.loads(service)
        }
        LOG.debug(data)
        response = self._dorequest(
            "/services/deploy", method="POST", data=json.dumps(data)
        )
        body = ''.join(response.readlines())
        response.close()
        return body

    def queryServiceInstances(self, serviceId):
        """
        Returns a sequence of ServiceInstance objects.
        """
        response = self._dorequest("/services/%s/running" % serviceId)
        body = ''.join(response.readlines())
        response.close()
        return ServiceJsonDecoder().decode(body)


    def queryServiceStatus(self, serviceId):
        """
        Returns a sequence of ServiceInstance objects.
        """
        response = self._dorequest("/services/%s/status" % serviceId)
        body = ''.join(response.readlines())
        response.close()
        return ServiceStatusJsonDecoder().decode(body)

    def queryHosts(self):
        """
        Returns a sequence of Host objects.
        """
        response = self._dorequest("/hosts")
        body = ''.join(response.readlines())
        response.close()
        return HostJsonDecoder().decode(body)

    def getHost(self, hostId):
        """
        Returns a sequence of Host objects.
        """
        response = self._dorequest("/hosts/%" % hostId)
        body = ''.join(response.readlines())
        response.close()
        return HostJsonDecoder().decode(body)

    def getInstance(self, serviceId, instanceId, default=None):
        """
        Returns the requested ServiceInstance object.
        """
        response = self._dorequest(
            "/services/%s/running/%s" % (serviceId, instanceId)
        )
        body = ''.join(response.readlines())
        response.close()
        return ServiceJsonDecoder().decode(body)

    def getServiceLog(self, serviceId, start=0, end=None):
        """
        """
        response = self._dorequest("/services/%s/logs" % serviceId)
        body = ''.join(response.readlines())
        response.close()
        log = json.loads(body)
        return log["Detail"]

    def getInstanceLog(self, serviceId, instanceId, start=0, end=None):
        """
        """
        response = self._dorequest(
            "/services/%s/%s/logs" % (serviceId, instanceId)
        )
        body = ''.join(response.readlines())
        response.close()
        log = json.loads(body)
        return str(log["Detail"])

    def killInstance(self, hostId, instanceId):
        """
        """
        response = self._dorequest(
            "/hosts/%s/%s" % (hostId, instanceId), method="DELETE"
        )
        response.close()

    def _makeRequest(self, uri, method=None, data=None, query=None):
        query = urllib.urlencode(query) if query else ""
        url = urlunparse(("https", self._netloc, uri, "", query, ""))
        args = {}
        if method:
            args["method"] = method
        if data:
            args["data"] = data
            args["headers"] = {"Content-Type": "application/json"}
        return _Request(url, **args)

    def _login(self):
        # Clear the cookie jar before logging in.
        self._cj.clear()
        encodedbody = json.dumps(self._creds)
        request = self._makeRequest("/login", data=encodedbody)
        response = self._opener.open(request)
        response.close()
        self._opener.close()

    def _dorequest(self, uri, method=None, data=None, query=None):
        request = self._makeRequest(
            uri, method=method, data=data, query=query)
        # Try to perform the request up to five times
        for trycount in range(5):
            try:
                return self._opener.open(request)
            except urllib2.HTTPError as ex:
                if ex.getcode() == 401:
                    self._login()
                    continue
                elif ex.getcode() == 500:
                    # Make the exception prettier and reraise it
                    try:
                        msg = json.load(ex)
                    except ValueError:
                        raise ex  # This stinks because we lose the stack
                    detail = msg.get('Detail')
                    if not detail:
                        raise
                    detail = detail.replace("Internal Server Error: ", "")
                    raise ControlCenterError(detail)
                raise
            else:
                # break the loop so we skip the loop's else clause
                break
        else:
            # raises the last exception that was raised (the 401 error)
            raise
Esempio n. 27
0
class Site(object):
    """Main point for which interaction with a MediaWiki
    API is made."""
    GITHUB = "https://github.com/ceradon/cerabot"
    USER_AGENT = "Cerabot/{0!r} (wikibot; Python/{1!r}; {2!r})"
    USER_AGENT = USER_AGENT.format("0.1", pyv(), GITHUB)
    config = {"throttle":10,
              "maxlag":10,
              "max_retries":3}

    def __init__(self, name=None, base_url="//en.wikipedia.org",
            project=None, lang=None, namespaces={}, login=(None, None),
            secure=False, config=None, user_agent=None, article_path=None,
            script_path="/w"):
        self._name = name
        if not project and not lang:
            self._base_url = base_url
            self._project = None
            self._lang = None
        else:
            self._lang = lang
            self._project = project
            self._base_url = "http://{0!r}.{1!r}".format(self._lang,
                    self._project)
        self._article_path = article_path
        self._script_path = script_path
        self._namespaces = namespaces
        if config:
            self._config = config
        else:
            self._config = self.config
        self._login_data = login
        self._secure = secure
        self._tokens = {}
        if user_agent:
            self._user_agent = user_agent
        else:
            self._user_agent = self.USER_AGENT

        self._throttle, self._maxlag, self._max_retries = self._config.values()
        self._last_query_time = 0
        self.cookie_jar = CookieJar()
        self.api_lock = Lock()
        self.opener = build_opener(HTTPCookieProcessor(self.cookie_jar))
        self.opener.addheaders = [("User-Agent", self._user_agent),
                                  ("Accept-Encoding", "gzip")]
        if self._login_data[0] and self._login_data[1]:
            self.login(login)
        self._load()

    def urlencode(self, params):
        """Implement urllib.urlencode() with support for unicode input.
        Thanks to Earwig (Ben Kurtovic) for this code."""
        enc = lambda s: s.encode("utf8") if isinstance(s, unicode) else str(s)
        args = []
        for key, val in params.iteritems():
            key = quote_plus(enc(key))
            val = quote_plus(enc(val))
            args.append(key + "=" + val)
        return "&".join(args)

    def _query(self, params, query_continue=False, tries=0, idle=5, 
            non_stop=False, prefix=None):
        """Queries the site's API."""
        last_query = time.time() - self._last_query_time
        if last_query < self._throttle:
            throttle = self._throttle - last_query
            print "Throttling: waiting {0} seconds".format(round(throttle, 2))
            time.sleep(throttle)
        params.setdefault("maxlag", self._maxlag)
        params.setdefault("format", "json")
        params["continue"] = ""
        try:
            if type(prefix).__name__ in ["tuple", "list"]:
                for p in prefix:
                    params[p + "limit"] = "max"
            else:
                params[prefix + "limit"] = "max"
        except TypeError:
            pass
        protocol = "https:" if self._secure else "http:"
        url = ''.join((protocol, self._base_url, self._script_path, "/api.php"))
        data = self.urlencode(params)
        try:
            reply = self.opener.open(url, data)
        except URLError as e:
            if hasattr(e, "code"):
                exc = "API query could not be completed: Error code: {0}"
                exc = exc.format(e.code)
            elif hasattr(e, "reason"):
                exc = "API query could not be completed. Reason: {0}"
                exc = exc.format(e.reason)
            else:
                exc = "API query could not be completed."
            raise exceptions.APIError(exc)
        
        result = reply.read()
        if reply.headers.get("Content-Encoding") == "gzip":
            stream = StringIO(result)
            zipper = gzip.GzipFile(fileobj=stream)
            result = zipper.read()
        
        try:
            res = json.loads(result)
        except ValueError:
            e = "API query failed: JSON could not be loaded"
            raise exceptions.APIError(e)
        
        try:
            code = res["error"]["code"]
            info = res["error"]["info"]
        except (TypeError, ValueError, KeyError):
            if "continue" in res and query_continue:
                continue_data = self._handle_query_continue(params, res, 
                    max_continues=5 if not non_stop else "max")
                res.update(continue_data)
            return res
        
        if code == "maxlag":
            if tries >= self._max_retries:
                e = "Maximum amount of allowed retries has been exhausted."
                raise exception.APIError(e)
            tries += 1
            time.sleep(idle)
            return self._query(params, tries=tries, idle=idle*2)
        else:
            e = "An unknown error occured. Here is the data from the API: {0}"
            return_data = "({0}, {1})".format(code, info)
            error = exceptions.APIError(e.format(return_data))
            error.code, error.info = code, info
            raise error
    
    def _load(self, force=False):
        """Loads the sites attributes. Called automatically on initiation."""
        attrs = [self._name, self._project, self._lang, self._base_url,
                self._script_path, self._article_path]
        query = {"action":"query", "meta":"siteinfo", "siprop":"general"}

        if not self._namespaces or force:
            query["siprop"] += "|namespaces|namespacealiases"
            result = self._query(query)
            for item in result["query"]["namespaces"].values():
                ns_id = item["id"]
                name = item["*"]
                try:
                    canonical = item["canonical"]
                except KeyError:
                    self._namespaces[ns_id] = [name]
                else:
                    if name != canonical:
                        self._namespaces[ns_id] = [name, canonical]
                    else:
                        self._namespaces[ns_id] = [name]
            
            for item in result["query"]["namespacealiases"]:
                ns_id = item["id"]
                alias = item["*"]
                self._namespaces[ns_id].append(alias)
        elif all(attrs):
            return
        else:
            result = self.query(query)
        
        result = result["query"]["general"]
        self._name = result["wikiid"]
        self._project = result["sitename"].lower()
        self._lang = result["lang"]
        self._base_url = result["server"]
        self._script_path = result["scriptpath"]
        self._article_path = result["articlepath"]

    def _handle_query_continue(self, request, data, max_continues=5):
        """Handle \'query-continues\' in API queries."""
        all_data = {}
        count = 0
        last_continue = {}
        if max_continues == "max":
            # I solemnly doubt there will ever be this many continues
            max_continues = 10000
        while "continue" in data and count < max_continues:
            query = deepcopy(request)
            query.update(last_continue)
            res = self._query(query)
            if "continue" in res:
                last_continue = res["continue"]
            try:
                if not all_data:
                    all_data = res
                else:
                    all_data.update(res)
            except (KeyError, IndexError):
                pass
            count += 1
            data = res
        data.update(all_data)
        return data

    def page(self, title="", pageid=0, follow_redirects=False):
        """Returns an instance of Page for *title* with *follow_redirects* 
        and *pageid* as arguments, unless *title* is a category, then 
        returns a Cateogry instance."""
        return Page(self, title, pageid, follow_redirects)

    def category(self, title="", pageid=0, follow_redirects=False):
        """Returns an instance of Category for *title* with *follow_redirects*
        and *pageid* as arguments."""
        return Category(self, title, pageid, follow_redirects)

    def user(self, name=None):
        """Returns an instance of User for *username*."""
        return User(name)

    def file(self, title, pageid=0, follow_redirects=False):
        """Returns an instance of File for *title* or *pageid*."""
        return File(title, pageid, follow_redirects)

    @property
    def domain(self):
        """Returns the site's web domain, like \"en.wikipedia.org\""""
        return urlparse(self._base_url).netloc

    def get_username(self):
        """Gets the name of the user that is currently logged into the site's API.
        Simple way to ensure that we are logged in."""
        data = self.query({"action":"query", "meta":"userinfo"})
        return data["query"]["userinfo"]["name"]

    def get_cookies(self, name, domain):
        for cookie in self.cookie_jar:
            if cookie.name == name and cookie.domain == domain:
                if cookie.is_expired():
                    break
                return cookie

    def save_cookie_jar(self):
        """Attempts to save all changes to our cookiejar after a 
        successful login or logout."""
        if hasattr(self.cookie_jar, "save"):
            try:
                getattr(self._cookiejar, "save")()
            except (NotImplementedError, ValueError):
                pass

    def query(self, params, query_continue=False, non_stop=False, 
            prefix=None):
        """Queries the site's API."""
        with self.api_lock:
            i = self._query(params, query_continue, non_stop=non_stop,
                prefix=prefix)
        return i

    def _login(self, login, token=None, attempts=0):
        """Logs into the site's API."""
        username, password = login
        if token:
            i = self.query({"action":"login", "lgname":username, 
                            "lgpassword":password, "lgtoken":token})
        else:
            i = self.query({"action":"login", 
                            "lgname":username, 
                            "lgpassword":password})

        res = i["login"]["result"]
        if res == "Success":
            self.save_cookie_jar()
        elif res == "NeedToken" and attempts == 0:
            token = i["login"]["token"]
            return self._login(login, token, attempts=1)
        else:
            if res == "Illegal":
                e = "The provided username is illegal."
            elif res == "NotExists":
                e = "The provided username does not exist."
            elif res == "EmptyPass":
                e = "No password was given."
            elif res == "WrongPass" or res == "WrongPluginPass":
                e = "The given password is incorrect."
            else:
                e = "An unknown error occured, API responded with {0)."
                e = e.format(res)
            raise exceptions.APILoginError(e)

    def login(self, login):
        """Public method for logging in to the API."""
        if not login:
            if self._login[0]:
                login = self._login
            else:
                e = "No login data or insufficient data provided."
                raise exceptions.APILoginError(e)
        if type(login) == tuple:
            self._login(login)
        else:
            e = "Login data must be in tuple format, got {0}"
            raise exceptions.APILoginError(e.format(type(login)))

    def logout(self):
        """Attempts to logout out the API and clear the cookie jar."""
        self.query({"action":"logout"})
        self.cookie_jar.clear()
        self.save_cookie_jar()

    def tokener(self, args=[]):
        i = re.compile("Action (.*?) is not allowed for the current user")
        valid_args = ["block", "delete", "edit", "email", "import", "move",
                      "options", "patrol", "protect", "unblock", "watch"]
        if not args:
            args = valid_args
        if self._tokens:
            m = {}
            for token in args:
                try:
                    m[token] = self._tokens[token]
                except (KeyError, IndexError):
                    m[token] = None
                    continue
            return m

        if not type(args) == list:
            return
        query = {"action":"query", "prop":"info", "titles":"Main Page",
                 "intoken":"|".join(args)}
        result = self.query(query)
        res = result["query"]["pages"]
        _tokens = {}
        c = res.keys()[0]
        possible_tokens = res[c]
        for key, val in possible_tokens.items():
            if key.endswith("token"):
                name = key[:key.find("token")]
                _tokens[name] = val
                args.pop(args.index(name))
        
        if "warnings" in result:
            a = result["warnings"]["info"]["*"].split("\n")
            if len(a) > 1:
                a = [b for b in a if b.lower().startswith("action")]
            for item in a:
                name = i.findall(item)
                name = name[0].strip("'")
                _tokens[name] = None
        self._tokens.update(_tokens)
        return _tokens

    def iterator(self, **kwargs):
        """Iterates over result of api query with *kwargs* as arguments
        and returns a generator."""
        result = None
        if "action" in kwargs.keys():
            kwargs.pop("action", 0)
        kwargs["action"] = "query"
        res = self.query(kwargs)
        if "warnings" in res:
            e = "Unknown error occured while attempting iterator query."
            e += " Got back: {0}".format(res)
            raise exceptions.APIError(e)
        if len(res["query"]) > 1:
            result = {}
            a = {}
            b = list(res["query"])
            for key, val in res["query"].items():
                a[key] = val
            while len(b) > 0:
                key = b.pop(0, False)
                if not key:
                    break
                results[key] = itertools.chain(a[key])
        elif len(res["query"]) == 1:
            result = (i for i in res["query"][list(res["query"])[0]])
        return result 

    def name_to_id(self, name):
        """Returns the associated id to the namespace *name*."""
        for ns_id, names in self._namespaces.items():
            if name.lower() in [i.lower() for i in names]:
                return ns_id

        error = "No such namespace with name {0}."
        raise exceptions.APIError(error)

    def id_to_name(self, ns_id, get_all=False):
        """Returns the associated name to the namespace id *ns_id*."""
        try:
            if get_all:
                return self._namespaces[ns_id]
            else:
                return self._namespaces[ns_id][0]
        except KeyError:
            error = "No such id with namespace {0}."
            raise exceptions.APIError(error)

    def __repr__(self):
        """Returns a coanonical string representation of Site."""
        res = u"Site(name={0}, base_url={1}, project={2}, lang={3}, "+ \
            "namespaces={4}, secure={5}, config={6}, article_path={7}"+ \
            "script_path={8}, user_agent={9}".format(self._name, 
            self._base_url, self._project, self._lang, self._namespaces, 
            self._secure, unicode(self._config), self._article_path,
            self._script_path, self._user_agent)
        if self._login_data[0] and self._login_data[1]:
            res = res + ", username={0}, password=<hidden>".format(
                self._login_data[0])
        return res

    def __str__(self):
        """Returns a prettier string representation of Site."""
        res = u"<Site(site object %s (%s, %s) for site %s"+ \
            " with user %s, config %s and user agent %s."
        if self._login_data[0]:
            res = res % (self._name, self._lang, self._project, self._base_url, 
                self._login_data[0], unicode(self._config), self._user_agent)
            return res.replace("'", "")
        res = res.replace("user %s, ", "")
        res = res % (self._name, self._lang, self._project, self._base_url,
            unicode(self._config), self._user_agent)
        return res.replace("'", "")
Esempio n. 28
0
class GHttp():
    def __init__(self):
        """
        class initialisation, creates cookie jar and headers
        """

        self.lastpage = None
        self.lasterror = None
        
        self.cj = CookieJar()

        self.cookieH = urllib2.HTTPCookieProcessor(self.cj)
        self.redirectH = urllib2.HTTPRedirectHandler()
        self.proxyH = None

        self.opener = urllib2.build_opener(self.cookieH, self.redirectH)

    def addproxy(self, proxyipport):
        self.proxyH = urllib2.ProxyHandler({'http':proxyipport})
        self.opener = urllib2.build_opener(self.cookieH, self.redirectH, self.proxyH)
        if self.rq('http://google.com') is None:
            return False
        return True

    def removeproxy(self):
        """
        Removes the currently set proxy
        """
        self.proxyH = None
        self.opener = urllib2.build_opener(self.cookieH, self.redirectH)
        
    def clearcookies(self):
        """
        clears all cookies from the cookie jar :)
        """
        self.cj.clear()
    
    def rq(self, url, ref=None, data=None):
        """
        Http request, it either returns response html or
        none if there's an error.

        Keyword arguments:
        url -- the url you want to request
        data -- this is for the POST method, data that you will be seding
        ref -- the referer to your request page, if none specified it will
               use last page's url or the current url (default None)
        """
        # reset lasterror
        self.lasterror = None

        # set the referrer
        if ref is None:
            if self.lastpage is None:
                self.lastpage = url
            ref = self.lastpage

        self.opener.addheaders = [('Referer', ref),
        ('User-Agent', 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.597.98 Safari/534.13')]

        # if data is a dictionary type we will use urllibe to encode it
        # to url format
        if isinstance(data, dict):
            data = urllib.urlencode(data)
            
        # catch exceptions so program does not crash
        try:
            if data is not None:
                opnr = self.opener.open(url, data=data)
            else:
                opnr = self.opener.open(url)
        except urllib2.HTTPError, e:
            self.lasterror = 'The server couldn\'t fulfill the request.' + \
                             'Error code: %s' % e.code
            return None
        except urllib2.URLError, e:
            self.lasterror = 'We failed to reach a server. Reason: %s' % e.reason
            return None
Esempio n. 29
0
class Bugz:
	""" Converts sane method calls to Bugzilla HTTP requests.

	@ivar base: base url of bugzilla.
	@ivar user: username for authenticated operations.
	@ivar password: password for authenticated operations
	@ivar cookiejar: for authenticated sessions so we only auth once.
	@ivar forget: forget user/password after session.
	@ivar authenticated: is this session authenticated already
	"""

	def __init__(self, base, user = None, password = None, forget = False,
			skip_auth = False, httpuser = None, httppassword = None ):
		"""
		{user} and {password} will be prompted if an action needs them
		and they are not supplied.

		if {forget} is set, the login cookie will be destroyed on quit.

		@param base: base url of the bugzilla
		@type  base: string
		@keyword user: username for authenticated actions.
		@type    user: string
		@keyword password: password for authenticated actions.
		@type    password: string
		@keyword forget: forget login session after termination.
		@type    forget: bool
		@keyword skip_auth: do not authenticate
		@type    skip_auth: bool
		"""
		self.base = base
		scheme, self.host, self.path, query, frag  = urlsplit(self.base)
		self.authenticated = False
		self.forget = forget

		if not self.forget:
			try:
				cookie_file = os.path.join(os.environ['HOME'], COOKIE_FILE)
				self.cookiejar = LWPCookieJar(cookie_file)
				if forget:
					try:
						self.cookiejar.load()
						self.cookiejar.clear()
						self.cookiejar.save()
						os.chmod(self.cookiejar.filename, 0700)
					except IOError:
						pass
			except KeyError:
				self.warn('Unable to save session cookies in %s' % cookie_file)
				self.cookiejar = CookieJar(cookie_file)
		else:
			self.cookiejar = CookieJar()

		self.opener = build_opener(HTTPCookieProcessor(self.cookiejar))
		self.user = user
		self.password = password
		self.httpuser = httpuser
		self.httppassword = httppassword
		self.skip_auth = skip_auth

	def log(self, status_msg):
		"""Default logging handler. Expected to be overridden by
		the UI implementing subclass.

		@param status_msg: status message to print
		@type  status_msg: string
		"""
		return

	def warn(self, warn_msg):
		"""Default logging handler. Expected to be overridden by
		the UI implementing subclass.

		@param status_msg: status message to print
		@type  status_msg: string
		"""
		return

	def get_input(self, prompt):
		"""Default input handler. Expected to be override by the
		UI implementing subclass.

		@param prompt: Prompt message
		@type  prompt: string
		"""
		return ''

	def auth(self):
		"""Authenticate a session.
		"""
		# check if we need to authenticate
		if self.authenticated:
			return

		# try seeing if we really need to request login
		if not self.forget:
			try:
				self.cookiejar.load()
			except IOError:
				pass

		req_url = urljoin(self.base, config.urls['auth'])
		req_url += '?GoAheadAndLogIn=1'
		req = Request(req_url, None, config.headers)
		if self.httpuser and self.httppassword:
			base64string = base64.encodestring('%s:%s' % (self.httpuser, self.httppassword))[:-1]
			req.add_header("Authorization", "Basic %s" % base64string)
		resp = self.opener.open(req)
		re_request_login = re.compile(r'<title>.*Log in to Bugzilla</title>')
		if not re_request_login.search(resp.read()):
			self.log('Already logged in.')
			self.authenticated = True
			return

		# prompt for username if we were not supplied with it
		if not self.user:
			self.log('No username given.')
			self.user = self.get_input('Username: '******'No password given.')
			self.password = getpass.getpass()

		# perform login
		qparams = config.params['auth'].copy()
		qparams['Bugzilla_login'] = self.user
		qparams['Bugzilla_password'] = self.password
		if not self.forget:
			qparams['Bugzilla_remember'] = 'on'

		req_url = urljoin(self.base, config.urls['auth'])
		req = Request(req_url, urlencode(qparams), config.headers)
		if self.httpuser and self.httppassword:
			base64string = base64.encodestring('%s:%s' % (self.httpuser, self.httppassword))[:-1]
			req.add_header("Authorization", "Basic %s" % base64string)
		resp = self.opener.open(req)
		if resp.info().has_key('Set-Cookie'):
			self.authenticated = True
			if not self.forget:
				self.cookiejar.save()
				os.chmod(self.cookiejar.filename, 0700)
			return True
		else:
			raise RuntimeError("Failed to login")

	def extractResults(self, resp):
		# parse the results into dicts.
		results = []
		columns = []
		rows = []

		for r in csv.reader(resp): rows.append(r)
		for field in rows[0]:
			if config.choices['column_alias'].has_key(field):
				columns.append(config.choices['column_alias'][field])
			else:
				self.log('Unknown field: ' + field)
				columns.append(field)
		for row in rows[1:]:
			if row[0].find("Missing Search") != -1:
				self.log('Bugzilla error (Missing search found)')
				return None
			fields = {}
			for i in range(min(len(row), len(columns))):
				fields[columns[i]] = row[i]
			results.append(fields)
		return results

	def search(self, query, comments = False, order = 'number',
			assigned_to = None, reporter = None, cc = None,
			commenter = None, whiteboard = None, keywords = None,
			status = [], severity = [], priority = [], product = [],
			component = []):
		"""Search bugzilla for a bug.

		@param query: query string to search in title or {comments}.
		@type  query: string
		@param order: what order to returns bugs in.
		@type  order: string

		@keyword assigned_to: email address which the bug is assigned to.
		@type    assigned_to: string
		@keyword reporter: email address matching the bug reporter.
		@type    reporter: string
		@keyword cc: email that is contained in the CC list
		@type    cc: string
		@keyword commenter: email of a commenter.
		@type    commenter: string

		@keyword whiteboard: string to search in status whiteboard (gentoo?)
		@type    whiteboard: string
		@keyword keywords: keyword to search for
		@type    keywords: string

		@keyword status: bug status to match. default is ['NEW', 'ASSIGNED',
						 'REOPENED'].
		@type    status: list
		@keyword severity: severity to match, empty means all.
		@type    severity: list
		@keyword priority: priority levels to patch, empty means all.
		@type    priority: list
		@keyword comments: search comments instead of just bug title.
		@type    comments: bool
		@keyword product: search within products. empty means all.
		@type    product: list
		@keyword component: search within components. empty means all.
		@type    component: list

		@return: list of bugs, each bug represented as a dict
		@rtype: list of dicts
		"""

		if not self.authenticated and not self.skip_auth:
			self.auth()

		qparams = config.params['list'].copy()
		if comments:
			qparams['long_desc'] = query
		else:
			qparams['short_desc'] = query

		qparams['order'] = config.choices['order'].get(order, 'Bug Number')
		qparams['bug_severity'] = severity or []
		qparams['priority'] = priority or []
		if status == None:
			qparams['bug_status'] = ['NEW', 'ASSIGNED', 'REOPENED']
		elif [s.upper() for s in status] == ['ALL']:
			qparams['bug_status'] = config.choices['status']
		else:
			qparams['bug_status'] = [s.upper() for s in status]
		qparams['product'] = product or ''
		qparams['component'] = component or ''
		qparams['status_whiteboard'] = whiteboard or ''
		qparams['keywords'] = keywords or ''

		# hoops to jump through for emails, since there are
		# only two fields, we have to figure out what combinations
		# to use if all three are set.
		unique = list(set([assigned_to, cc, reporter, commenter]))
		unique = [u for u in unique if u]
		if len(unique) < 3:
			for i in range(len(unique)):
				e = unique[i]
				n = i + 1
				qparams['email%d' % n] = e
				qparams['emailassigned_to%d' % n] = int(e == assigned_to)
				qparams['emailreporter%d' % n] = int(e == reporter)
				qparams['emailcc%d' % n] = int(e == cc)
				qparams['emaillongdesc%d' % n] = int(e == commenter)
		else:
			raise AssertionError('Cannot set assigned_to, cc, and '
					'reporter in the same query')

		req_params = urlencode(qparams, True)
		req_url = urljoin(self.base, config.urls['list'])
		req_url += '?' + req_params
		req = Request(req_url, None, config.headers)
		if self.httpuser and self.httppassword:
			base64string = base64.encodestring('%s:%s' % (self.httpuser, self.httppassword))[:-1]
			req.add_header("Authorization", "Basic %s" % base64string)
		resp = self.opener.open(req)
		return self.extractResults(resp)

	def namedcmd(self, cmd):
		"""Run command stored in Bugzilla by name.

		@return: Result from the stored command.
		@rtype: list of dicts
		"""

		if not self.authenticated and not self.skip_auth:
			self.auth()

		qparams = config.params['namedcmd'].copy()
		# Is there a better way of getting a command with a space in its name
		# to be encoded as foo%20bar instead of foo+bar or foo%2520bar?
		qparams['namedcmd'] = quote(cmd)
		req_params = urlencode(qparams, True)
		req_params = req_params.replace('%25','%')

		req_url = urljoin(self.base, config.urls['list'])
		req_url += '?' + req_params
		req = Request(req_url, None, config.headers)
		if self.user and self.hpassword:
			base64string = base64.encodestring('%s:%s' % (self.user, self.hpassword))[:-1]
			req.add_header("Authorization", "Basic %s" % base64string)
		resp = self.opener.open(req)

		return self.extractResults(resp)

	def get(self, bugid):
		"""Get an ElementTree representation of a bug.

		@param bugid: bug id
		@type  bugid: int

		@rtype: ElementTree
		"""
		if not self.authenticated and not self.skip_auth:
			self.auth()

		qparams = config.params['show'].copy()
		qparams['id'] = bugid

		req_params = urlencode(qparams, True)
		req_url = urljoin(self.base,  config.urls['show'])
		req_url += '?' + req_params
		req = Request(req_url, None, config.headers)
		if self.httpuser and self.httppassword:
			base64string = base64.encodestring('%s:%s' % (self.httpuser, self.httppassword))[:-1]
			req.add_header("Authorization", "Basic %s" % base64string)
		resp = self.opener.open(req)

		fd = StringIO(resp.read())
		# workaround for ill-defined XML templates in bugzilla 2.20.2
		parser = ForcedEncodingXMLTreeBuilder(encoding = 'utf-8')
		etree = ElementTree.parse(fd, parser)
		bug = etree.find('.//bug')
		if bug and bug.attrib.has_key('error'):
			return None
		else:
			return etree

	def modify(self, bugid, title = None, comment = None, url = None,
			status = None, resolution = None,
			assigned_to = None, duplicate = 0,
			priority = None, severity = None,
			add_cc = [], remove_cc = [],
			add_dependson = [], remove_dependson = [],
			add_blocked = [], remove_blocked = [],
			whiteboard = None, keywords = None):
		"""Modify an existing bug

		@param bugid: bug id
		@type  bugid: int
		@keyword title: new title for bug
		@type    title: string
		@keyword comment: comment to add
		@type    comment: string
		@keyword url: new url
		@type    url: string
		@keyword status: new status (note, if you are changing it to RESOLVED, you need to set {resolution} as well.
		@type    status: string
		@keyword resolution: new resolution (if status=RESOLVED)
		@type    resolution: string
		@keyword assigned_to: email (needs to exist in bugzilla)
		@type    assigned_to: string
		@keyword duplicate: bug id to duplicate against (if resolution = DUPLICATE)
		@type    duplicate: int
		@keyword priority: new priority for bug
		@type    priority: string
		@keyword severity: new severity for bug
		@type    severity: string
		@keyword add_cc: list of emails to add to the cc list
		@type    add_cc: list of strings
		@keyword remove_cc: list of emails to remove from cc list
		@type    remove_cc: list of string.
		@keyword add_dependson: list of bug ids to add to the depend list
		@type    add_dependson: list of strings
		@keyword remove_dependson: list of bug ids to remove from depend list
		@type    remove_dependson: list of strings
		@keyword add_blocked: list of bug ids to add to the blocked list
		@type    add_blocked: list of strings
		@keyword remove_blocked: list of bug ids to remove from blocked list
		@type    remove_blocked: list of strings

		@keyword whiteboard: set status whiteboard
		@type    whiteboard: string
		@keyword keywords: set keywords
		@type    keywords: string

		@return: list of fields modified.
		@rtype: list of strings
		"""
		if not self.authenticated and not self.skip_auth:
			self.auth()


		buginfo = Bugz.get(self, bugid)
		if not buginfo:
			return False

		modified = []
		qparams = config.params['modify'].copy()
		qparams['id'] = bugid
		qparams['knob'] = 'none'

		# copy existing fields
		FIELDS = ('bug_file_loc', 'bug_severity', 'short_desc', 'bug_status',
				'status_whiteboard', 'keywords',
				'op_sys', 'priority', 'version', 'target_milestone',
				'assigned_to', 'rep_platform', 'product', 'component')

		FIELDS_MULTI = ('blocked', 'dependson')

		for field in FIELDS:
			try:
				qparams[field] = buginfo.find('.//%s' % field).text
			except:
				pass

		for field in FIELDS_MULTI:
			qparams[field] = [d.text for d in buginfo.findall('.//%s' % field)]

		# set 'knob' if we are change the status/resolution
		# or trying to reassign bug.
		if status:
			status = status.upper()
		if resolution:
			resolution = resolution.upper()

		if status == 'RESOLVED' and status != qparams['bug_status']:
			qparams['knob'] = 'resolve'
			if resolution:
				qparams['resolution'] = resolution
			else:
				qparams['resolution'] = 'FIXED'

			modified.append(('status', status))
			modified.append(('resolution', qparams['resolution']))
		elif status == 'ASSIGNED' and status != qparams['bug_status']:
			qparams['knob'] = 'accept'
			modified.append(('status', status))
		elif status == 'REOPENED' and status != qparams['bug_status']:
			qparams['knob'] = 'reopen'
			modified.append(('status', status))
		elif status == 'VERIFIED' and status != qparams['bug_status']:
			qparams['knob'] = 'verified'
			modified.append(('status', status))
		elif status == 'CLOSED' and status != qparams['bug_status']:
			qparams['knob'] = 'closed'
			modified.append(('status', status))
		elif duplicate:
			qparams['knob'] = 'duplicate'
			qparams['dup_id'] = duplicate
			modified.append(('status', 'RESOLVED'))
			modified.append(('resolution', 'DUPLICATE'))
		elif assigned_to:
			qparams['knob'] = 'reassign'
			qparams['assigned_to'] = assigned_to
			modified.append(('assigned_to', assigned_to))

		# setup modification of other bits
		if comment:
			qparams['comment'] = comment
			modified.append(('comment', ellipsis(comment, 60)))
		if title:
			qparams['short_desc'] = title or ''
			modified.append(('title', title))
		if url != None:
			qparams['bug_file_loc'] = url
			modified.append(('url', url))
		if severity != None:
			qparams['bug_severity'] = severity
			modified.append(('severity', severity))
		if priority != None:
			qparams['priority'] = priority
			modified.append(('priority', priority))

		# cc manipulation
		if add_cc != None:
			qparams['newcc'] = ', '.join(add_cc)
			modified.append(('newcc', qparams['newcc']))
		if remove_cc != None:
			qparams['cc'] = remove_cc
			qparams['removecc'] = 'on'
			modified.append(('cc', remove_cc))

		# bug depend/blocked manipulation
		changed_dependson = False
		changed_blocked = False
		if remove_dependson:
			for bug_id in remove_dependson:
				qparams['dependson'].remove(str(bug_id))
				changed_dependson = True
		if remove_blocked:
			for bug_id in remove_blocked:
				qparams['blocked'].remove(str(bug_id))
				changed_blocked = True
		if add_dependson:
			for bug_id in add_dependson:
				qparams['dependson'].append(str(bug_id))
				changed_dependson = True
		if add_blocked:
			for bug_id in add_blocked:
				qparams['blocked'].append(str(bug_id))
				changed_blocked = True

		qparams['dependson'] = ','.join(qparams['dependson'])
		qparams['blocked'] = ','.join(qparams['blocked'])
		if changed_dependson:
			modified.append(('dependson', qparams['dependson']))
		if changed_blocked:
			modified.append(('blocked', qparams['blocked']))

		if whiteboard != None:
			qparams['status_whiteboard'] = whiteboard
			modified.append(('status_whiteboard', whiteboard))
		if keywords != None:
			qparams['keywords'] = keywords
			modified.append(('keywords', keywords))

		req_params = urlencode(qparams, True)
		req_url = urljoin(self.base, config.urls['modify'])
		req = Request(req_url, req_params, config.headers)
		if self.httpuser and self.httppassword:
			base64string = base64.encodestring('%s:%s' % (self.httpuser, self.httppassword))[:-1]
			req.add_header("Authorization", "Basic %s" % base64string)

		try:
			resp = self.opener.open(req)
			return modified
		except:
			return []

	def attachment(self, attachid):
		"""Get an attachment by attachment_id

		@param attachid: attachment id
		@type  attachid: int

		@return: dict with three keys, 'filename', 'size', 'fd'
		@rtype: dict
		"""
		if not self.authenticated and not self.skip_auth:
			self.auth()

		qparams = config.params['attach'].copy()
		qparams['id'] = attachid

		req_params = urlencode(qparams, True)
		req_url = urljoin(self.base, config.urls['attach'])
		req_url += '?' + req_params
		req = Request(req_url, None, config.headers)
		if self.httpuser and self.httppassword:
			base64string = base64.encodestring('%s:%s' % (self.httpuser, self.httppassword))[:-1]
			req.add_header("Authorization", "Basic %s" % base64string)
		resp = self.opener.open(req)

		try:
			content_type = resp.info()['Content-type']
			namefield = content_type.split(';')[1]
			filename = re.search(r'name=\"(.*)\"', namefield).group(1)
			content_length = int(resp.info()['Content-length'], 0)
			return {'filename': filename, 'size': content_length, 'fd': resp}
		except:
			return {}

	def post(self, product, component, title, description, url = '', assigned_to = '', cc = '', keywords = '', version = '', dependson = '', blocked = '', priority = '', severity = ''):
		"""Post a bug

		@param product: product where the bug should be placed
		@type product: string
		@param component: component where the bug should be placed
		@type component: string
		@param title: title of the bug.
		@type  title: string
		@param description: description of the bug
		@type  description: string
		@keyword url: optional url to submit with bug
		@type url: string
		@keyword assigned_to: optional email to assign bug to
		@type assigned_to: string.
		@keyword cc: option list of CC'd emails
		@type: string
		@keyword keywords: option list of bugzilla keywords
		@type: string
		@keyword version: version of the component
		@type: string
		@keyword dependson: bugs this one depends on
		@type: string
		@keyword blocked: bugs this one blocks
		@type: string
		@keyword priority: priority of this bug
		@type: string
		@keyword severity: severity of this bug
		@type: string

		@rtype: int
		@return: the bug number, or 0 if submission failed.
		"""
		if not self.authenticated and not self.skip_auth:
			self.auth()

		qparams = config.params['post'].copy()
		qparams['product'] = product
		qparams['component'] = component
		qparams['short_desc'] = title
		qparams['comment'] = description
		qparams['assigned_to']  = assigned_to
		qparams['cc'] = cc
		qparams['bug_file_loc'] = url
		qparams['dependson'] = dependson
		qparams['blocked'] = blocked
		qparams['keywords'] = keywords

		#XXX: default version is 'unspecified'
		if version != '':
			qparams['version'] = version

		#XXX: default priority is 'P2'
		if priority != '':
			qparams['priority'] = priority

		#XXX: default severity is 'normal'
		if severity != '':
			qparams['bug_severity'] = severity

		req_params = urlencode(qparams, True)
		req_url = urljoin(self.base, config.urls['post'])
		req = Request(req_url, req_params, config.headers)
		if self.httpuser and self.httppassword:
			base64string = base64.encodestring('%s:%s' % (self.httpuser, self.httppassword))[:-1]
			req.add_header("Authorization", "Basic %s" % base64string)
		resp = self.opener.open(req)

		try:
			re_bug = re.compile(r'<title>.*Bug ([0-9]+) Submitted</title>')
			bug_match = re_bug.search(resp.read())
			if bug_match:
				return int(bug_match.group(1))
		except:
			pass

		return 0

	def attach(self, bugid, title, description, filename,
			content_type = 'text/plain'):
		"""Attach a file to a bug.

		@param bugid: bug id
		@type  bugid: int
		@param title: short description of attachment
		@type  title: string
		@param description: long description of the attachment
		@type  description: string
		@param filename: filename of the attachment
		@type  filename: string
		@keywords content_type: mime-type of the attachment
		@type content_type: string

		@rtype: bool
		@return: True if successful, False if not successful.
		"""
		if not self.authenticated and not self.skip_auth:
			self.auth()

		qparams = config.params['attach_post'].copy()
		qparams['bugid'] = bugid
		qparams['description'] = title
		qparams['comment'] = description
		qparams['contenttypeentry'] = content_type

		filedata = [('data', filename, open(filename).read())]
		content_type, body = encode_multipart_formdata(qparams.items(),
				filedata)

		req_headers = config.headers.copy()
		req_headers['Content-type'] = content_type
		req_headers['Content-length'] = len(body)
		req_url = urljoin(self.base, config.urls['attach_post'])
		req = Request(req_url, body, req_headers)
		if self.httpuser and self.httppassword:
			base64string = base64.encodestring('%s:%s' % (self.httpuser, self.httppassword))[:-1]
			req.add_header("Authorization", "Basic %s" % base64string)
		resp = self.opener.open(req)

		# TODO: return attachment id and success?
		try:
			re_success = re.compile(r'<title>Changes Submitted</title>')
			if re_success.search(resp.read()):
				return True
		except:
			pass

		return False
Esempio n. 30
0
class Connection:

    ENCODING = 'gb18030'
    USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'
    BBS_URL = 'http://bbs.nju.edu.cn/'
    DATE_FORMAT = '%b %d %H:%M'
    LINE_WIDTH = 40
    base_url = 'http://bbs.nju.edu.cn/'

    def __init__(self, session=None):
        self._cj = CookieJar()
        self._opener = urllib2.build_opener(
            urllib2.HTTPCookieProcessor(self._cj))
        self._opener.addheaders = [('User-Agent', self.USER_AGENT)]
        self._opener.addheaders = [('Referer', self.BBS_URL)]
        if session:
            self.load_session(session)

    def _do_action(self, action, params=None, data=None):
        args = []
        if params:
            for k, v in params.items():
                if isinstance(v, list):
                    args += ['{0}={1}'.format(k, i) for i in v]
                else:
                    args.append('{0}={1}'.format(k, v))
        url = self.base_url + action + ('?' if args else '') + '&'.join(args)
        logger.debug(url)
        body = []
        if data:
            for k, v in data.items():
                body.append('{0}={1}'.format(
                    quote(k), quote(unicode(v).encode(self.ENCODING))))
        try:
            response = self._opener.open(url, '&'.join(body) if data else None)
        except URLError:
            raise NetworkError()
        # decode() in py2.6 does not support `errors` kwarg.
        html = response.read().decode(self.ENCODING, 'ignore')
        # TODO: BeautifulSoup still needs this?
        html = html.replace(u'<nobr>', u'')  # damn it
        return html

    def load_session(self, session):
        from utils import make_cookie
        self.base_url = '{0}vd{1}/'.format(self.BBS_URL, session.vd)
        self._cj.set_cookie(make_cookie('_U_KEY', session.key))
        self._cj.set_cookie(make_cookie('_U_UID', session.uid))
        self._cj.set_cookie(make_cookie('_U_NUM', session.num))

    def is_logged_in(self):
        html = self._do_action('bbsfoot')
        return html.find('bbsqry?userid=guest') == -1

    def login(self, username, password):
        '''
        return Session if successful else None
        '''
        from random import randint
        session = Session()
        session.vd = str(randint(10000, 100000))
        self.base_url = '{0}vd{1}/'.format(self.BBS_URL, session.vd)

        params = {'type': 2}
        data = {u'id': username, u'pw': password}
        html = self._do_action('bbslogin', params, data)

        try:
            s = re.search(r"setCookie\('(.*)'\)", html).group(1)
        except AttributeError:
            return None

        s = s.split('+')
        session.key = str(int(s[-1]) - 2)
        s = s[0].split('N')
        session.uid = s[-1]
        session.num = str(int(s[0]) + 2)
        self.load_session(session)
        return session

    def logout(self, session=None):
        if session:
            self.load_session(session)
        data = {'Submit': u'注销登录'.encode(self.ENCODING)}
        self._do_action('bbslogout', '', data)
        self._cj.clear()
        self.base_url = self.BBS_URL

    def compose(self, board, title, body, pid=None, gid=None, signature=0):
        '''
        XXX: unicode
        '''
        params = {'board': board}
        lines = body.split(u'\r\n')
        body = []
        for i in lines:
            body.append(u'\r\n'.join(wrap(i, self.LINE_WIDTH)))
        body = u'\r\n'.join(body)
        data = {'title': title, 'text': body}
        if pid is not None:
            data['reid'] = pid
            data['pid'] = gid
        data['signature'] = signature
        html = self._do_action('bbssnd', params, data)
        return 'Refresh' in html

    def fetch_post(self, board, pid, num):
        params = {'board': board, 'file': pid2str(pid), 'num': num}
        html = self._do_action('bbscon', params)
        soup = BeautifulSoup(html)
        txt = soup.find('textarea').text
        ret = Post(board, pid, num)
        ret.parse_post(txt)
        # TODO: works for 'x' post
        s = soup.findAll('a')[-1]['href']
        gid = parse_qs(urlparse(s).query).get('gid', None)
        if gid is not None:
            ret.gid = gid[0]
        else:
            ret.gid = None
        return ret

    def fetch_topic(self, board, pid, start=None):
        params = {'board': board, 'file': pid2str(pid)}
        if start:
            params['start'] = start
        html = self._do_action('bbstcon', params)
        soup = BeautifulSoup(html)
        ret = Topic(board, pid)
        items = soup.findAll('table', {'class': 'main'})
        if not items:
            raise ContentError()
        for i in items:
            c = i.tr.td.a['href']
            p = Post(board, parse_pid(c), parse_num(c))
            c = i.findAll('tr')[1].td.textarea.text
            p.parse_post(c)
            ret.post_list.append(p)
        for i in soup.body.center.findAll('a', recursive=False, limit=3):
            if i.text == u'本主题下30篇':
                ret.next_start = int(parse_href(i['href'], 'start'))
        return ret

    def fetch_page(self, board, start=None):
        params = {'board': board}
        if start:
            params['start'] = start
        html = self._do_action('bbstdoc', params)
        soup = BeautifulSoup(html)

        items = soup.findAll('tr')[1:]
        year = datetime.now().year
        ret = Page(board)
        for i in items:
            cells = i.findAll('td')
            h = Header()
            h.board = board
            try:
                h.num = int(cells[0].text) - 1
            except ValueError:
                continue
            h.author = cells[2].text.strip()
            h.date = cells[3].text.strip()
            h.date = datetime.strptime(h.date, self.DATE_FORMAT)
            h.date = h.date.replace(year=year)
            h.title = cells[4].text.strip()[2:]
            h.pid = parse_pid(cells[4].a['href'])
            tmp = cells[5].text.strip()
            if tmp.find('/') != -1:
                tmp = tmp.split('/')
                h.reply_count = int(tmp[0])
                h.view_count = int(tmp[1])
            else:
                h.view_count = int(tmp)
            ret.header_list.append(h)
        # TODO
        for i in soup.body.center.findAll('a', recursive=False):
            if i.text == u'上一页':
                ret.prev_start = int(parse_href(i['href'], 'start')) - 1
        return ret

    def fetch_top10(self):
        html = self._do_action('bbstop10')
        soup = BeautifulSoup(html)
        items = soup.findAll('tr')[1:]
        ret = Page(u'全站十大')
        for i in items:
            cells = i.findAll('td')
            h = Header()
            h.board = cells[1].text.strip()
            h.title = cells[2].text.strip()
            h.pid = parse_pid(cells[2].a['href'])
            h.author = cells[3].text.strip()
            h.reply_count = int(cells[4].text.strip())
            ret.header_list.append(h)
        return ret

    def fetch_hot(self):
        html = self._do_action('bbstopall')
        soup = BeautifulSoup(html)
        items = soup.findAll('tr')
        ret = []
        tmp = None
        for i in items:
            if i.img:
                tmp = []
                continue
            cells = i.findAll('td')
            if not cells[0].text:
                ret.append(tmp)
                continue
            for j in cells:
                h = Header()
                links = j.findAll('a')
                h.title = links[0].text.strip()
                h.board = links[1].text.strip()
                h.pid = parse_pid(links[0]['href'])
                tmp.append(h)
        return ret

    def fetch_favorites(self):
        html = self._do_action('bbsleft')
        soup = BeautifulSoup(html)
        div = soup.findAll('div', {'id': 'div0'})
        if not div:
            raise Error()
        items = div[0]
        items = items.findAll('a')[:-1]
        ret = [i.text for i in items]
        return ret

    def fetch_board_list(self):
        from time import sleep
        ret = BoardManager()
        for i in range(12):
            sleep(1)
            html = self._do_action('bbsboa', {'sec': i})
            soup = BeautifulSoup(html)
            try:
                text = re.search(ur'\[(\w+?)区\]<hr', html, re.UNICODE).group(1)
            except AttributeError:
                raise ContentError(u'请勿过快刷新页面')
            section = Section(i, text)
            items = soup.findAll('tr')[1:]
            for i in items:
                cells = i.findAll('td')
                s = cells[5].text[2:]
                # Some board may have a voting in progress
                if s.endswith(u'V'):
                    s = s[:-1]
                board = Board(cells[2].text, s)
                section.board_list.append(board)
            ret.add(section)
        return ret

    def fetch_face_list(self):
        html = self._do_action('editor/face.htm', {'ptext': 'text'})
        soup = BeautifulSoup(html)
        items = soup.findAll('img')
        ret = {}
        for i in items:
            ret[i['title']] = i['src']
        with open('FaceList.json', 'w') as f:
            json.dump(ret, f)