Example #1
0
    def wrapper(self, endpoint, auto_retry=False, **kwargs):
        self.add_auth_to_headers(kwargs)
        back_off = 1
        retries = 0
        while True:
            try:
                response = f(self, endpoint, **kwargs)
            except requests.exceptions.ConnectionError:
                if retries < 5:
                    log.warning('Connection error encountered. Retry')
                    retries = retries + 1
                    continue
                else:
                    raise
            retries = 0

            dump_response(response)
            if response.status_code == 429:
                try:
                    retry_after = min(int(response.headers['retry-after']), 1)
                except Exception:
                    retry_after = 1
                log.warning('429 encountered. Retry after {} seconds'.format(
                    retry_after))
                time.sleep(retry_after)
                continue

            if (response.status_code in [500, 502
                                         ]) and auto_retry and back_off < 600:
                message = {}
                # if we get a 500 b/c a message can not be decrypted then a retry will not help
                try:
                    message = response.json()
                except JSONDecodeError:
                    message = {}
                if message.get('message', '') in [
                        'Unable to parse encrypted message',
                        'Unable to decrypt content name.',
                        'Unable to decrypt message',
                        'DefaultActivityEncryptionKeyUrl not found.'
                ]:
                    # retry does not help
                    break
                # if message.get..
                log.warning(
                    '\'{}\' encountered. Message {}. Retry, waiting for {} second(s)'
                    .format(response.reason, message, back_off))
                time.sleep(back_off)
                back_off = back_off * 2
                continue
            # if (response.status_code ...
            break
        # while True:
        return response
Example #2
0
 def refresh_token_to_access_token(self, refresh_token, client_info):
     endpoint = self.endpoint('access_token')
     data = Struct()
     data.grant_type = 'refresh_token'
     data.refresh_token = refresh_token.token
     data.client_id = client_info['id']
     data.client_secret = client_info['secret']
     
     # Sometime we get a 401 and retrying helps to fix that temporary glitch
     for _ in range(5):
         response = self.session.post(endpoint, data=data.get_dict())
         dump_response(response)
         if response.status_code != 401: break
         log.warning('Got 401 on token refresh. Retrying...')
     if response.status_code != 200: raise IbError('Unexpected status code on GET(12): {} {}'.format(response.status_code, response.reason), response.status_code, response.reason, response.text)
     result = Struct(response.json())
     return result
Example #3
0
 def auth_code_to_token(self, client_info, code):
     
     endpoint = self.endpoint('access_token')
     data = Struct() 
     data.grant_type = 'authorization_code'
     data.redirect_uri = client_info['redirect_uri']
     data.code = code
     data.client_id = client_info['id']
     data.client_secret = client_info['secret']
     
     log.debug('Exchanging code for access token. POST to {}'.format(endpoint))
     response = self.session.post(endpoint, data=data.get_dict())
     dump_response(response)
     if response.status_code != 200: raise IbError('Unexpected status code on POST(12): {} {}'.format(response.status_code, response.reason))
     
     oauth_token = Struct(response.json())
     return oauth_token
Example #4
0
    def auth_code_to_token(self, client_info, code):

        endpoint = self.endpoint('access_token')
        data = Struct()
        data.grant_type = 'authorization_code'
        data.redirect_uri = client_info['redirect_uri']
        data.code = code
        data.client_id = client_info['id']
        data.client_secret = client_info['secret']

        log.debug(
            'Exchanging code for access token. POST to {}'.format(endpoint))
        response = self.session.post(endpoint, data=data.get_dict())
        dump_response(response)
        if response.status_code != 200:
            raise IbError('Unexpected status code on POST(12): {} {}'.format(
                response.status_code, response.reason))

        oauth_token = Struct(response.json())
        return oauth_token
Example #5
0
 def _follow_redirects(self, response, intercept_url=''):
     ''' Follow redirects of response. If intercept_url is given the redirect chain stops at the 1st url which starts with the given value
     Return:
          if intercept_url is given then we return a dictionary of values in the intercepted URL or none if no intercept happened
          if no intercept_url is given then we return the last response.
     '''
     while response.status_code == 302:
         dump_response(response)
         # determine redirection target
         location = response.headers['location'].strip()
         loc_url = urllib.parse.urlparse(location)
         if loc_url.query:
             query = urllib.parse.parse_qs(loc_url.query)
             if 'error' in query:
                 raise FlowError('OAuth error(11): {}, {}'.format(query['error'][0], query['error_description'][0]))
             if intercept_url and location.startswith(intercept_url):
                 return query
         # follow redirection
         location = urllib.parse.urljoin(response.request.url, location) 
         response = self.session.get(location, allow_redirects=False)
     return None if intercept_url else response
Example #6
0
    def refresh_token_to_access_token(self, refresh_token, client_info):
        endpoint = self.endpoint('access_token')
        data = Struct()
        data.grant_type = 'refresh_token'
        data.refresh_token = refresh_token.token
        data.client_id = client_info['id']
        data.client_secret = client_info['secret']

        # Sometime we get a 401 and retrying helps to fix that temporary glitch
        for _ in range(5):
            response = self.session.post(endpoint, data=data.get_dict())
            dump_response(response)
            if response.status_code != 401: break
            log.warning('Got 401 on token refresh. Retrying...')
        if response.status_code != 200:
            raise IbError(
                'Unexpected status code on GET(12): {} {}'.format(
                    response.status_code, response.reason),
                response.status_code, response.reason, response.text)
        result = Struct(response.json())
        return result
Example #7
0
 def _follow_redirects(self, response, intercept_url=''):
     ''' Follow redirects of response. If intercept_url is given the redirect chain stops at the 1st url which starts with the given value
     Return:
          if intercept_url is given then we return a dictionary of values in the intercepted URL or none if no intercept happened
          if no intercept_url is given then we return the last response.
     '''
     while response.status_code == 302:
         dump_response(response)
         # determine redirection target
         location = response.headers['location'].strip()
         loc_url = urllib.parse.urlparse(location)
         if loc_url.query:
             query = urllib.parse.parse_qs(loc_url.query)
             if 'error' in query:
                 raise FlowError('OAuth error(11): {}, {}'.format(
                     query['error'][0], query['error_description'][0]))
             if intercept_url and location.startswith(intercept_url):
                 return query
         # follow redirection
         location = urllib.parse.urljoin(response.request.url, location)
         response = self.session.get(location, allow_redirects=False)
     return None if intercept_url else response
Example #8
0
 def _cisco_sso_user_auth(self, response, user_id, user_password):
     ''' execute the full web browser flow for a cisco.com SSO enabled user
     return is the response after the authentication flow. The response typically will be the form asking for authorization for the client
     '''
     # Form based authentication for cisco.com SSO enabled user    
     
     # this gets us a hidden form which we need to submit
     soup = bs4.BeautifulSoup(response.text, 'lxml')
     form = soup.find('form')
     if not form: raise FlowError('No form found(2)')
     
     # the action tag has a URL to be used for the form action. The full URL uses the same base as the request URL
     form_action = urllib.parse.urljoin(response.request.url, form.get('action', urllib.parse.urlparse(response.request.url).path)) 
     
     # There should be a few input fields carrying RelayState and SAMLRequest
     inputs = form.find_all('input')
     if not inputs: raise FlowError('No input fields found(3)')
     
     # compile the form data
     form_data = {inp['name'] : inp['value'] for inp in inputs if inp['type'] != 'submit'}
     
     # Try to post the form
     log.debug('auth code grant flow (Cisco SSO, {}): submit hidden form with SAMLRequest to {}'.format(user_id, form_action))
     response = self.session.post(form_action, data = form_data)
     dump_response(response)
     if response.status_code !=200: raise FlowError('Unexpected status code on POST(4): {} {}'.format(response.status_code, response.reason)) 
     
     # this get's us to a page where the CEC credentials need to be entered
     # Now we should be at the point where we use form based authentication
     soup = bs4.BeautifulSoup(response.text, 'lxml')
     form = soup.find('form')
     if not form: raise FlowError('No form found(5)')
     
     # the action tag has a URL to be used for the form action. The full URL uses the same base as the request URL
     form_action = urllib.parse.urljoin(response.request.url, form.get('action', urllib.parse.urlparse(response.url).path)) 
     
     inputs = form.find_all('input')
     if not inputs: raise FlowError('No input fields found(6)')
     
     # compile the form data
     # we assume that the 1st two fields are user and password
     form_data = {inp['name'] : inp['value'] for inp in inputs[2:] if inp['type'] != 'submit'}
     form_data[inputs[0]['name']] = user_id
     form_data[inputs[1]['name']] = user_password
     
     # Try to post the form, w/o redirects; location has trailing spaces which the requests modulde does not strip
     log.debug('auth code grant flow (Cisco SSO, {}): Posting credentials to {}'.format(user_id, form_action))
     for _ in range(5):
         try:
             response = self.session.post(form_action, data = form_data, allow_redirects=False)
         except requests.exceptions.ConnectionError:
             time.sleep(1)
             continue
         break
     response = self._follow_redirects(response)
     dump_response(response)
     
     if response.status_code !=200: raise FlowError('Unexpected status code on POST(7): {} {}'.format(response.status_code, response.reason))
     
     # let's check for an error message
     soup = bs4.BeautifulSoup(response.text, 'lxml')
     warn_msg = soup.find(id='warning-msg')
     if warn_msg:
         raise FlowError('Authentication problem: \n{}'.format(warn_msg.text.strip()))
         
     # this gets us to a page with some JavaScript code which resumes somewhere
     q = urllib.parse.parse_qs(urllib.parse.urlparse(response.url).query, keep_blank_values=True)
     if not 'resumePath' in q:
         raise FlowError ('Could not find resume path in query string: {}'.format(response.url))
     resume_url = 'https://cloudsso.cisco.com' + q['resumePath'][0]
     
     log.debug('auth code grant flow (Cisco SSO, {}): Resume flow. Get on {}'.format(user_id, resume_url))
     retries = 0
     while True:
         try:
             response = self.session.get(resume_url)
         except requests.exceptions.ConnectionError:
             retries += 1
             if retries >= 5: raise
             time.sleep(1)
             continue
         break
     dump_response(response)
     if response.status_code !=200: raise FlowError('Unexpected status code on GET(8): {} {}'.format(response.status_code, response.reason)) 
     
     # this returns a page with <body onload="javascript:document.forms[0].submit()">
     # So we again need to look at the embedded form
     soup = bs4.BeautifulSoup(response.text, 'lxml')
     form = soup.find('form')
     if not form: raise FlowError('No form found(9)')
     
     # the action tag has a URL to be used for the form action. The full URL uses the same base as the request URL
     form_action = urllib.parse.urljoin(response.url, form.get('action', urllib.parse.urlparse(response.url).path)) 
     
     # There should be a few input fields carrying RelayState and SAMLResponse
     inputs = form.find_all('input')
     if not inputs: raise FlowError('No input fields found(10)')
     
     # compile the form data
     form_data = {inp['name'] : inp['value'] for inp in inputs if inp['type'] != 'submit'}
     
     if log.isEnabledFor(logging.DEBUG):
         # take a look at the SAMLResponse
         saml_response = form_data['SAMLResponse']
         for l in (s for s in xml.dom.minidom.parseString(base64.b64decode(saml_response)).toprettyxml().splitlines() if s.strip()):
             log.debug('SAML Response: {}'.format(l))
             
         #print('SAML Response:\n  ')
         #print('\n  '.join((s for s in xml.dom.minidom.parseString(base64.b64decode(saml_response)).toprettyxml().splitlines() if s.strip())))
                     
     # post the form, but w/o automatic redirects, b/c we want to be able to intercept errors
     log.debug('auth code grant flow (Cisco SSO, {}): Submit hidden form to {}'.format(user_id, form_action))
     
     response = self.session.post(form_action, data = form_data, allow_redirects=False)
     response = self._follow_redirects(response)
     if response.status_code !=200: raise FlowError('Unexpected status code on GET(12): {} {}'.format(response.status_code, response.reason)) 
     return response
Example #9
0
 def auth_code_grant_flow(self, user_info, client_info, scope = 'webexsquare:admin'):
     ''' Executes an OAuth Authorization Code Grant Flow
     Returns an Authorizatioon code
     '''
     assert user_info['email']
     assert user_info['id']
     assert user_info['password']
     assert client_info['id']
     assert client_info['redirect_uri']
     assert client_info['secret']
     
     # we try to use the Authorization Code Grant Flow
     endpoint = self.endpoint('authorize')
     
     # random state
     flow_state = str(uuid.uuid4())
     data = Struct() 
     data.response_type = 'code'
     data.state = flow_state
     data.client_id = client_info['id']
     data.redirect_uri = client_info['redirect_uri']
     data.scope = scope
     log.debug('auth code grant flow: access endpoint {}'.format(endpoint))
     
     response = self.session.get(endpoint, params=data.get_dict())
     dump_response(response)
     if response.status_code !=200: raise FlowError('Unexpected status code on GET(1): {} {}'.format(response.status_code, response.reason)) 
   
     # after a number of redirects this gets us to a page on which we need to enter an email address
     # The title is "Sign In - Cisco WebEx"
     # if we still have a valid session cookie we might actually get to the OAuth2 authorization page directly
     soup = bs4.BeautifulSoup(response.text, 'lxml')
     title = soup.find('title')
     
     if not(title and title.text.strip() in ['Sign In - Cisco WebEx', 'OAuth2 Authorization - Cisco WebEx']):
         raise FlowError('Didn\'t find expected title')
     
     if title and title.text.strip() == 'Sign In - Cisco WebEx':
         # Need to sign in.
        
         log.debug('auth code grant flow: found expected \'Sign In - Cisco WebEx\'')
         '''
         This form is part of the reply:
             <form name="GlobalEmailLookup" id="GlobalEmailLookupForm" method="post" action="/idb/globalLogin">
                 <input type="hidden" id="email" name="email" value=""></input>
                 <input type="hidden" id="isCookie" name="isCookie" value="false"></input>
                 <input type="hidden" name="gotoUrl" value="aHR0cHM6Ly9pZGJyb2tlci53ZWJleC5jb20vaWRiL29hdXRoMi92MS9hdXRob3JpemU/c2NvcGU9c3BhcmslM0FwZW9wbGVfcmVhZCtzcGFyayUzQXJvb21zX3JlYWQrc3BhcmslM0FtZW1iZXJzaGlwc19yZWFkK3NwYXJrJTNBbWVzc2FnZXNfcmVhZCZjbGllbnRfaWQ9Q2U2N2Y5NzE0YTEzN2U2ODg0OGJhNjQ1YzQ4NjBmYThhZWUyYzUwMzFlZTA1YmMyMjE2MzNkMGNlZWRlOWExYjkmcmVkaXJlY3RfdXJpPWh0dHBzJTNBJTJGJTJGb2F1dGgua3JvaG5zLmRlJTJGb2F1dGgyJnN0YXRlPXNvbWVSYW5kb21TdHJpbmcmcmVzcG9uc2VfdHlwZT1jb2Rl" />
                 <input type="hidden" id="encodedParamsString" name="encodedParamsString" value="dHlwZT1sb2dpbg==" />
             </form>
         A POST with the email address to that form is the next step
         '''
         soup = bs4.BeautifulSoup(response.text, 'lxml')
         form = soup.find(id = 'GlobalEmailLookupForm')
         if not form: raise FlowError('Couldn\'t find form \'GlobalEmailLookupForm\' to post user\'s email address')
         
         inputs = form.find_all('input')
         # 1st input is the email address
         inputs[0]['value'] = user_info['email']
         form_data = {i['name'] : i['value'] for i in inputs}
         form_action = urllib.parse.urljoin(response.request.url, form.get('action', urllib.parse.urlparse(response.request.url).path)) 
         log.debug('auth code grant flow: Posting email address {} to form {}'.format(user_info['email'], form_action))
         response = self.session.post(form_action, data = form_data)
         dump_response(response)    
         
         # For CIS users this redirects us to a page with title "Sign In - Cisco WebEx"
         log.debug('auth code grant flow: Checking for title \'Sign In - Cisco WebEx\'')
         soup = bs4.BeautifulSoup(response.text, 'lxml')
         title = soup.find('title')
         if title and title.text.strip() == 'Sign In - Cisco WebEx':
             # Identified the form to directly enter credentials
             dump_response(response)
             # search for the form with name 'Login'
             form = soup.find(lambda tag : tag.name == 'form' and tag.get('name', '') == 'Login')
             inputs = form.find_all('input')
             form_data = {i['name'] : i['value'] for i in inputs}
             form_data['IDToken0'] = ''
             form_data['IDToken1'] = user_info['email']
             form_data['IDToken2'] = user_info['password']
             form_data['IDButton'] = 'Sign In'
             form_action = urllib.parse.urljoin(response.request.url, form.get('action', urllib.parse.urlparse(response.request.url).path)) 
             log.debug('auth code grant flow: Found title \'Sign In - Cisco WebEx\'. Posting credentials to {}'.format(form_action))
             response = self.session.post(form_action, data = form_data)
             dump_response(response)
         else:
             # authentication of a cisco.com SSO enabled user requires multiple steps (SAML 2.0 REDIRECT/POST flow with some javascript ...
             response = self._cisco_sso_user_auth(response, user_info['id'], user_info['password'])
         # if title and title.text.strip() == if title and title.text.strip() == 'Sign In - Cisco WebEx': .. else ..
     # if title and title.text.strip() == 'Sign In - Cisco WebEx':
                 
     # this now is a form where we are requested to grant the requested access
     soup = bs4.BeautifulSoup(response.text, 'lxml')
     form = soup.find('form')
     if not form: raise FlowError('No form found(13)')
     
     # the action tag has a URL to be used for the form action. The full URL uses the same base as the request URL
     form_action = urllib.parse.urljoin(response.url, form.get('action', urllib.parse.urlparse(response.url).path)) 
     
     inputs = form.find_all('input')
     if not inputs: raise FlowError('No input fields found(14)')
     
     # compile the form data
     # the form basically has few hidden fields and the "decision" field needs to be set to "accept"
     form_data = {inp['name'] : inp['value'] for inp in inputs if inp['type'] == 'hidden'}
     form_data['decision'] = 'accept'
     
     # Again post, but no automatic redirects
     log.debug('auth code grant flow: Granting access to client by posting \'accept\' decision')
     response = self.session.post(form_action, data = form_data, allow_redirects=False)
     
     # follow redirects, but stop at client redirect URI; this allows to use non-existing redirect URIs
     response = self._follow_redirects(response, client_info['redirect_uri'])
     if not response: raise FlowError('Failed to get OAuth authorization code')
     if response['state'][0] != flow_state: raise FlowError('State has been tampered with?!. Got ({}), expected ({})'.format(response['state'][0], flow_state))
     return response['code'][0]
Example #10
0
    def _cisco_sso_user_auth(self, response, user_id, user_password):
        ''' execute the full web browser flow for a cisco.com SSO enabled user
        return is the response after the authentication flow. The response typically will be the form asking for authorization for the client
        '''
        # Form based authentication for cisco.com SSO enabled user

        # this gets us a hidden form which we need to submit
        soup = bs4.BeautifulSoup(response.text, 'lxml')
        form = soup.find('form')
        if not form: raise FlowError('No form found(2)')

        # the action tag has a URL to be used for the form action. The full URL uses the same base as the request URL
        form_action = urllib.parse.urljoin(
            response.request.url,
            form.get('action',
                     urllib.parse.urlparse(response.request.url).path))

        # There should be a few input fields carrying RelayState and SAMLRequest
        inputs = form.find_all('input')
        if not inputs: raise FlowError('No input fields found(3)')

        # compile the form data
        form_data = {
            inp['name']: inp['value']
            for inp in inputs if inp['type'] != 'submit'
        }

        # Try to post the form
        log.debug(
            'auth code grant flow (Cisco SSO, {}): submit hidden form with SAMLRequest to {}'
            .format(user_id, form_action))
        response = self.session.post(form_action, data=form_data)
        dump_response(response)
        if response.status_code != 200:
            raise FlowError('Unexpected status code on POST(4): {} {}'.format(
                response.status_code, response.reason))

        # this get's us to a page where the CEC credentials need to be entered
        # Now we should be at the point where we use form based authentication
        soup = bs4.BeautifulSoup(response.text, 'lxml')
        form = soup.find('form')
        if not form: raise FlowError('No form found(5)')

        # the action tag has a URL to be used for the form action. The full URL uses the same base as the request URL
        form_action = urllib.parse.urljoin(
            response.request.url,
            form.get('action',
                     urllib.parse.urlparse(response.url).path))

        inputs = form.find_all('input')
        if not inputs: raise FlowError('No input fields found(6)')

        # compile the form data
        # we assume that the 1st two fields are user and password
        form_data = {
            inp['name']: inp['value']
            for inp in inputs[2:] if inp['type'] != 'submit'
        }
        form_data[inputs[0]['name']] = user_id
        form_data[inputs[1]['name']] = user_password

        # Try to post the form, w/o redirects; location has trailing spaces which the requests modulde does not strip
        log.debug(
            'auth code grant flow (Cisco SSO, {}): Posting credentials to {}'.
            format(user_id, form_action))
        for _ in range(5):
            try:
                response = self.session.post(form_action,
                                             data=form_data,
                                             allow_redirects=False)
            except requests.exceptions.ConnectionError:
                time.sleep(1)
                continue
            break
        response = self._follow_redirects(response)
        dump_response(response)

        if response.status_code != 200:
            raise FlowError('Unexpected status code on POST(7): {} {}'.format(
                response.status_code, response.reason))

        # let's check for an error message
        soup = bs4.BeautifulSoup(response.text, 'lxml')
        warn_msg = soup.find(id='warning-msg')
        if warn_msg:
            raise FlowError('Authentication problem: \n{}'.format(
                warn_msg.text.strip()))

        # this gets us to a page with some JavaScript code which resumes somewhere
        q = urllib.parse.parse_qs(urllib.parse.urlparse(response.url).query,
                                  keep_blank_values=True)
        if not 'resumePath' in q:
            raise FlowError(
                'Could not find resume path in query string: {}'.format(
                    response.url))
        resume_url = 'https://cloudsso.cisco.com' + q['resumePath'][0]

        log.debug(
            'auth code grant flow (Cisco SSO, {}): Resume flow. Get on {}'.
            format(user_id, resume_url))
        retries = 0
        while True:
            try:
                response = self.session.get(resume_url)
            except requests.exceptions.ConnectionError:
                retries += 1
                if retries >= 5: raise
                time.sleep(1)
                continue
            break
        dump_response(response)
        if response.status_code != 200:
            raise FlowError('Unexpected status code on GET(8): {} {}'.format(
                response.status_code, response.reason))

        # this returns a page with <body onload="javascript:document.forms[0].submit()">
        # So we again need to look at the embedded form
        soup = bs4.BeautifulSoup(response.text, 'lxml')
        form = soup.find('form')
        if not form: raise FlowError('No form found(9)')

        # the action tag has a URL to be used for the form action. The full URL uses the same base as the request URL
        form_action = urllib.parse.urljoin(
            response.url,
            form.get('action',
                     urllib.parse.urlparse(response.url).path))

        # There should be a few input fields carrying RelayState and SAMLResponse
        inputs = form.find_all('input')
        if not inputs: raise FlowError('No input fields found(10)')

        # compile the form data
        form_data = {
            inp['name']: inp['value']
            for inp in inputs if inp['type'] != 'submit'
        }

        if log.isEnabledFor(logging.DEBUG):
            # take a look at the SAMLResponse
            saml_response = form_data['SAMLResponse']
            for l in (s for s in xml.dom.minidom.parseString(
                    base64.b64decode(
                        saml_response)).toprettyxml().splitlines()
                      if s.strip()):
                log.debug('SAML Response: {}'.format(l))

            #print('SAML Response:\n  ')
            #print('\n  '.join((s for s in xml.dom.minidom.parseString(base64.b64decode(saml_response)).toprettyxml().splitlines() if s.strip())))

        # post the form, but w/o automatic redirects, b/c we want to be able to intercept errors
        log.debug(
            'auth code grant flow (Cisco SSO, {}): Submit hidden form to {}'.
            format(user_id, form_action))

        response = self.session.post(form_action,
                                     data=form_data,
                                     allow_redirects=False)
        response = self._follow_redirects(response)
        if response.status_code != 200:
            raise FlowError('Unexpected status code on GET(12): {} {}'.format(
                response.status_code, response.reason))
        return response
Example #11
0
    def auth_code_grant_flow(self,
                             user_info,
                             client_info,
                             scope='webexsquare:admin'):
        ''' Executes an OAuth Authorization Code Grant Flow
        Returns an Authorizatioon code
        '''
        assert user_info['email']
        assert user_info['id']
        assert user_info['password']
        assert client_info['id']
        assert client_info['redirect_uri']
        assert client_info['secret']

        # we try to use the Authorization Code Grant Flow
        endpoint = self.endpoint('authorize')

        # random state
        flow_state = str(uuid.uuid4())
        data = Struct()
        data.response_type = 'code'
        data.state = flow_state
        data.client_id = client_info['id']
        data.redirect_uri = client_info['redirect_uri']
        data.scope = scope
        log.debug('auth code grant flow: access endpoint {}'.format(endpoint))

        response = self.session.get(endpoint, params=data.get_dict())
        dump_response(response)
        if response.status_code != 200:
            raise FlowError('Unexpected status code on GET(1): {} {}'.format(
                response.status_code, response.reason))

        # after a number of redirects this gets us to a page on which we need to enter an email address
        # The title is "Sign In - Cisco WebEx"
        # if we still have a valid session cookie we might actually get to the OAuth2 authorization page directly
        soup = bs4.BeautifulSoup(response.text, 'lxml')
        title = soup.find('title')

        if not (title and title.text.strip() in [
                'Sign In - Cisco WebEx', 'OAuth2 Authorization - Cisco WebEx'
        ]):
            raise FlowError('Didn\'t find expected title')

        if title and title.text.strip() == 'Sign In - Cisco WebEx':
            # Need to sign in.

            log.debug(
                'auth code grant flow: found expected \'Sign In - Cisco WebEx\''
            )
            '''
            This form is part of the reply:
                <form name="GlobalEmailLookup" id="GlobalEmailLookupForm" method="post" action="/idb/globalLogin">
                    <input type="hidden" id="email" name="email" value=""></input>
                    <input type="hidden" id="isCookie" name="isCookie" value="false"></input>
                    <input type="hidden" name="gotoUrl" value="aHR0cHM6Ly9pZGJyb2tlci53ZWJleC5jb20vaWRiL29hdXRoMi92MS9hdXRob3JpemU/c2NvcGU9c3BhcmslM0FwZW9wbGVfcmVhZCtzcGFyayUzQXJvb21zX3JlYWQrc3BhcmslM0FtZW1iZXJzaGlwc19yZWFkK3NwYXJrJTNBbWVzc2FnZXNfcmVhZCZjbGllbnRfaWQ9Q2U2N2Y5NzE0YTEzN2U2ODg0OGJhNjQ1YzQ4NjBmYThhZWUyYzUwMzFlZTA1YmMyMjE2MzNkMGNlZWRlOWExYjkmcmVkaXJlY3RfdXJpPWh0dHBzJTNBJTJGJTJGb2F1dGgua3JvaG5zLmRlJTJGb2F1dGgyJnN0YXRlPXNvbWVSYW5kb21TdHJpbmcmcmVzcG9uc2VfdHlwZT1jb2Rl" />
                    <input type="hidden" id="encodedParamsString" name="encodedParamsString" value="dHlwZT1sb2dpbg==" />
                </form>
            A POST with the email address to that form is the next step
            '''
            soup = bs4.BeautifulSoup(response.text, 'lxml')
            form = soup.find(id='GlobalEmailLookupForm')
            if not form:
                raise FlowError(
                    'Couldn\'t find form \'GlobalEmailLookupForm\' to post user\'s email address'
                )

            inputs = form.find_all('input')
            # 1st input is the email address
            inputs[0]['value'] = user_info['email']
            form_data = {i['name']: i['value'] for i in inputs}
            form_action = urllib.parse.urljoin(
                response.request.url,
                form.get('action',
                         urllib.parse.urlparse(response.request.url).path))
            log.debug(
                'auth code grant flow: Posting email address {} to form {}'.
                format(user_info['email'], form_action))
            response = self.session.post(form_action, data=form_data)
            dump_response(response)

            # For CIS users this redirects us to a page with title "Sign In - Cisco WebEx"
            log.debug(
                'auth code grant flow: Checking for title \'Sign In - Cisco WebEx\''
            )
            soup = bs4.BeautifulSoup(response.text, 'lxml')
            title = soup.find('title')
            if title and title.text.strip() == 'Sign In - Cisco WebEx':
                # Identified the form to directly enter credentials
                dump_response(response)
                # search for the form with name 'Login'
                form = soup.find(lambda tag: tag.name == 'form' and tag.get(
                    'name', '') == 'Login')
                inputs = form.find_all('input')
                form_data = {i['name']: i['value'] for i in inputs}
                form_data['IDToken0'] = ''
                form_data['IDToken1'] = user_info['email']
                form_data['IDToken2'] = user_info['password']
                form_data['IDButton'] = 'Sign In'
                form_action = urllib.parse.urljoin(
                    response.request.url,
                    form.get('action',
                             urllib.parse.urlparse(response.request.url).path))
                log.debug(
                    'auth code grant flow: Found title \'Sign In - Cisco WebEx\'. Posting credentials to {}'
                    .format(form_action))
                response = self.session.post(form_action, data=form_data)
                dump_response(response)
            else:
                # authentication of a cisco.com SSO enabled user requires multiple steps (SAML 2.0 REDIRECT/POST flow with some javascript ...
                response = self._cisco_sso_user_auth(response, user_info['id'],
                                                     user_info['password'])
            # if title and title.text.strip() == if title and title.text.strip() == 'Sign In - Cisco WebEx': .. else ..
        # if title and title.text.strip() == 'Sign In - Cisco WebEx':

        # this now is a form where we are requested to grant the requested access
        soup = bs4.BeautifulSoup(response.text, 'lxml')
        form = soup.find('form')
        if not form: raise FlowError('No form found(13)')

        # the action tag has a URL to be used for the form action. The full URL uses the same base as the request URL
        form_action = urllib.parse.urljoin(
            response.url,
            form.get('action',
                     urllib.parse.urlparse(response.url).path))

        inputs = form.find_all('input')
        if not inputs: raise FlowError('No input fields found(14)')

        # compile the form data
        # the form basically has few hidden fields and the "decision" field needs to be set to "accept"
        form_data = {
            inp['name']: inp['value']
            for inp in inputs if inp['type'] == 'hidden'
        }
        form_data['decision'] = 'accept'

        # Again post, but no automatic redirects
        log.debug(
            'auth code grant flow: Granting access to client by posting \'accept\' decision'
        )
        response = self.session.post(form_action,
                                     data=form_data,
                                     allow_redirects=False)

        # follow redirects, but stop at client redirect URI; this allows to use non-existing redirect URIs
        response = self._follow_redirects(response,
                                          client_info['redirect_uri'])
        if not response:
            raise FlowError('Failed to get OAuth authorization code')
        if response['state'][0] != flow_state:
            raise FlowError(
                'State has been tampered with?!. Got ({}), expected ({})'.
                format(response['state'][0], flow_state))
        return response['code'][0]
Example #12
0
def get_attachments():
    
    def assert_folder(p_state, base_path, room_id, room_folder):
        ''' make sure that the folder is created for the room
        '''
        if not os.path.lexists(base_path):
            # base directory needs to be created
            logging.debug('Base directory %s does not exist' % base_path)
            os.mkdir(base_path)
        
        full_path = os.path.join(base_path, room_folder)
        
        if room_id not in p_state:
            p_state[room_id] = {}
        room_state = p_state[room_id]
        
        if 'folder' not in room_state:
            logging.debug('No previous folder for room %s' % room_folder)
            # the folder for this room hasn't been created before
            i = 0
            base_folder = room_folder
            while True:
                full_path = os.path.join(base_path, room_folder)
                try:
                    os.mkdir(full_path)
                    logging.debug('Created folder %s' % full_path)
                except FileExistsError:
                    # Folder exists, but not for this room?
                    # Try to find the room the folder has been created for
                    logging.debug('Folder {} already exists'.format(full_path))
                    r = next((r for r in p_state.values() if r.get('folder') == room_folder), None)
                    if r:
                        i = i + 1
                        room_folder = base_folder + str(i)
                        logging.debug('Room folder {} belongs to identified. Creating alternate name {} for new folder'.format(full_path, room_folder))
                        # we need to come up with a different folder name for the new folder
                        continue
                    else:
                        # this folder seems to be stale?
                        logging.debug('Folder {} seems to belong to no room. Renaming to {}'.format(full_path, full_path + '.stale'))
                        os.rename(full_path, full_path + '.stale')
                        os.mkdir(full_path)
                        logging.debug('Created folder %s' % full_path)
                break     
            # while
            
            # remember the folder name for this room
            room_state['folder'] =  room_folder
        else:
            # has the folder name been changed?
            if room_folder != room_state['folder']:
                logging.debug('Room name (folder) for room %s changed from %s to %s' % (room_id, room_state['folder'], room_folder))
                old_full_path = os.path.join(base_path, room_state['folder'])
                logging.debug('Renaming %s to %s' % (old_full_path, full_path))
                try:
                    os.rename(old_full_path, full_path)
                except FileNotFoundError:
                    logging.warning('Tried to rename folder {} but the folder did not exist'.format(old_full_path))
                    if os.path.lexists(full_path):
                        logging.warning('New folder {} exists. Assuming this is the correct folder'.format(full_path))
                    else:
                        logging.warning('New folder also does not exist. Potentially lost state!?')
                room_state['folder'] = room_folder
            # if room_folder != ...
            
            if not os.path.lexists(full_path):
                logging.debug('Folder %s does not exist and will be created' % full_path)
                os.mkdir(full_path)
        # we might have changed the folder name. So we return the potentially updated value 
        return room_folder
    
    def copy_attachment(p_state, base_path, room_id, room_folder, message, attachment_index, file_name, response):
        ''' read the attachment to a file
        '''
        message_id = message['id']
        message_created = message['created']
        
        # remove whitespaces from file_name
        (base, ext) = os.path.splitext(file_name)
        base = base.strip()
        ext = ext.strip()
        file_name = base + ext
        
        file_name = file_name.strip()
        full_path = os.path.join(base_path, room_folder)
        full_name = os.path.join(full_path, file_name)
        room_state = p_state[room_id]
        
        if 'messages' not in room_state:
            logging.debug('Initialize message state in room state')
            room_state['messages'] = {}
        messages_state = room_state['messages']
        
        if message_id not in messages_state:
            logging.debug('Initialize message state for message %s from %s' % (message_id, str_to_datetime(message_created).isoformat()))
            messages_state[message_id] = {'created' : message_created}
        message_state = messages_state[message_id]
        
        attachment_index = str(attachment_index).strip()
        if attachment_index not in message_state:
            logging.debug('New attachment. Message %s from %s, index %s, file \'%s\'' % (message_id, message_created, attachment_index, file_name))
            # record the file name for this attachment
            if os.path.exists(full_name):
                logging.debug('File \'%s\' already exists' % file_name)
                # Find the message and index which currently uses this name
                # The Mac OS X file system in case preserving but case insensitive so "attachment.png" and "Attachment.png" are the 'same'
                # we have to consider that when searching for the message which references to a given file name: the check needs to be case insensitive 
                class UpdateDone(Exception): pass
                try:
                    for _, ms in messages_state.items():
                        for idx in ms:
                            if ms[idx].lower() == file_name.lower():
                                # this is the existing entry
                                logging.debug('Existing file \'%s\' belongs to message from %s' % (ms[idx], str_to_datetime(ms['created']).isoformat()))
                                # the older file needs to be renamed
                                if message_created > ms['created']:
                                    logging.debug('This attachment seems to be newer. This: %s, existing: %s' % 
                                                  (str_to_datetime(message_created).isoformat(), str_to_datetime(ms['created']).isoformat()))
                                    logging.debug('Existing file needs to be renamed')
                                    (base, ext) = os.path.splitext(ms[idx])
                                    new_name = base + '_' + str_to_datetime(ms['created']).strftime('%Y%m%d%H%M%S') + '-' + str(attachment_index).strip() + ext
                                    logging.debug('File will be renamed to \'%s\'' % new_name)
                                    os.rename(os.path.join(full_path, ms[idx]), os.path.join(full_path, new_name))
                                    ms[idx] = new_name
                                else:
                                    logging.debug('This attachment seems to be older. This: %s, existing: %s' % 
                                                  (str_to_datetime(message_created).isoformat(), str_to_datetime(ms['created']).isoformat()))
                                    logging.debug('This attachment needs to be saved under a different name')
                                    (base, ext) = os.path.splitext(file_name)
                                    file_name = base + '_' + str_to_datetime(message_created).strftime('%Y%m%d%H%M%S') + '-' + str(attachment_index).strip() + ext
                                    full_name = os.path.join(base_path, room_folder, file_name)
                                    logging.debug('Attachment will be saved as %s instead' % file_name)
                                raise UpdateDone
                            # if ms[idx] ..
                        # for idx in ms:
                    # for _, ms in messages_state.items():
                    logging.warning('File \'%s\' exists, but message this attachment belongs to could not be found' % full_name)
                    logging.warning('.. renaming to {}'.format(full_name + '.stale'))
                    os.rename(full_name, full_name + '.stale')
                except UpdateDone: pass
            else:
                # the file does not exist. For sanity reasons remove all references to attachments with the same name from the message state
                # reason: user might have "cleaned up" the attachment repository on the file system and deleted a file
                for _, ms in messages_state.items():
                    for idx in list(ms.keys()):
                        if ms[idx].lower() == file_name.lower():
                            logging.debug('Found stale message state for file %s from %s. Removing state..' % (ms[idx], str_to_datetime(ms['created']).isoformat()))
                            del ms[idx]
            # now finally copy the file
            logging.info('      Downloading attachment to \'%s\'' % full_name)
            with open(full_name, 'wb') as f:
                response.raw.decode_content = True
                shutil.copyfileobj(response.raw, f)
            # set access and last modified date
            f_time = str_to_datetime(message_created).timestamp()
            os.utime(full_name, (f_time, f_time))
            
            message_state[attachment_index] = file_name
        else:
            logging.debug('Attachment already downloaded. Message %s from %s, index %s, file \'%s\' as \'%s\'' % 
                          (message_id, message_created, attachment_index, file_name, message_state[attachment_index]))
            logging.info('      Already downloaded. Skipping file...')  
        return
    
    def check_new_activity(p_state, room):
        ''' check whether there is new activity in the room
        returns:
            None - no new activity
            '' - all messsages in the room are new
            <datestring> - date/time of last activity. Only newer activities need to be considered
        '''
        room_id = room['id']
        last_activity = room['lastActivity']
        
        if room_id in p_state:
            last_seen = p_state[room_id].get('lastActivity', '')
            if last_activity != p_state[room_id].get('lastActivity', ''):
                logging.debug('New activity in room: last seen %s, now %s' % (last_seen, last_activity))
                # p_state[room_id]['lastActivity'] = last_activity
                return last_seen
            else:
                logging.debug('No new activity in room: last seen %s' % last_seen)
                return None
        else:
            # p_state[room_id] = {'lastActivity' : last_activity}
            logging.debug('New activity in room. Room never tested before')
            return ''
    
    def set_last_activity(p_state, room, activity):
        ''' sets 'lastActivity' for the given rooom in p_state
        '''
        room_id = room['id']
        logging.debug('Setting last activity for room to: {}'.format(activity))
        if room_id in p_state:
            p_state[room_id]['lastActivity'] = activity
        else:
            p_state[room_id] = {'lastActivity': activity}
        return
    
    setup_logging()
    
    spark_config = configparser.ConfigParser()
    spark_config.read('spark.ini')
    
    set_mask_password(spark_config['user']['password'])
    ib = SparkDevIdentityBroker()
    oauth_token = OAuthToken(ib, spark_config['user'], spark_config['client'])
    
    spark = spark_api.SparkAPI(oauth_token)
    
    att_config = configparser.ConfigParser()
    att_config.read(os.path.splitext(__file__)[0] + '.ini')
    
    base_path = os.path.abspath(os.path.expanduser(att_config['path']['base']))
    
    state_file = os.path.splitext(__file__)[0] + '.json'
    try:
        f = open(state_file, 'r')
    except IOError:
        logging.debug('Did not find saved state in file %s' % state_file)
        p_state = {}
    else:
        logging.debug('Reading saved state from file %s' % state_file)
        p_state = json.load(f)
        f.close()
    
    logging.info('Getting list of rooms...')
    try:
        rooms = list(spark.list_rooms())
    except spark_api.APIError as e:
        try:
            logging.error('Error getting rooms: %s' % e.args[2]['message'])
        except Exception:
            logging.error('Error getting rooms: %s' % e.args[2])
        rooms = []
    logging.info('Found {} rooms'.format(len(rooms)))
    
    try:
        for room in rooms:
            room_id = room['id']
            # in case the room doesn't have a title we use the room ID as fallback
            room_folder = valid_filename(room.get('title', room_id))
            
            logging.info('Checking room \'%s\'' % room_folder)
            logging.debug('ID: %s, %s' % (room_id, spark_api.base64_id_to_str(room_id)))
            last_activity = check_new_activity(p_state, room)
            if last_activity == None:
                logging.info('No new activity. Skipping room')
                continue
            
            # iterate through all messages with attachments
            def get_messages_with_attachments(room_id, last_activity):
                ''' get all messages with attachment of given room newer than last_activity
                '''
                # if we never read the room try to read messages in bigger chunks
                max_messages = 200 if not last_activity else 50
                for m in spark.list_messages(room_id, p_max=max_messages):
                    if m['created'] <= last_activity:
                        logging.debug('Got last message after last checked activity. Last activity %s, this message %s' % (str_to_datetime(last_activity).isoformat(), str_to_datetime(m['created']).isoformat()))
                        break
                    if 'files' in m:
                        # only collect messages with attachments
                        yield m
                return
                
            try:
                messages = get_messages_with_attachments(room_id, last_activity)
            
                '''if not messages:
                    logging.info('  No new messages with attachments in room')
                '''
                for message in messages:
                    message_created = str_to_datetime(message['created'])
                    logging.info('  %s: Message with %s attachments.' % (message_created.isoformat(), len(message['files'])))
                    
                    for attachment_index in range(len(message['files'])):
                        attachment = message['files'][attachment_index]
                        
                        class DownloadError(Exception): pass
                        
                        try:
                            back_off = 1
                            while True:
                                logging.debug('  Getting attachment {} from {}'.format(attachment_index, attachment))
                                
                                # we set the dump_utilities log level to INFO to avoid hick-ups from trying to log the content
                                # the current log level will be set back to the original value after
                                level = logging.getLogger('dump_utilities').getEffectiveLevel()
                                logging.getLogger('dump_utilities').setLevel(logging.INFO)
                                
                                response = spark.get(attachment, stream=True)
                                
                                logging.getLogger('dump_utilities').setLevel(level)
                                dump_response(response, dump_body=False)
                                
                                # sometimes we don't get the attachment and instead a JSON error message is returned
                                cd_header = response.headers.get('content-disposition', None)
                                if cd_header == None:
                                    try:
                                        js = response.json()
                                        logging.error('Error downloading from room {}, time {}, error message: {}'.format(room_folder, message_created.isoformat(), js.get('message', 'Unknown problem: %s' % js)))
                                    except Exception:
                                        logging.error('Error downloading from room {}, time {}. No content-disposition header and no JSON found. Headers: {}'.format(room_folder, message_created.isoformat(), response.headers))
                                        raise DownloadError
                                    response.close()
                                    if back_off > 32: raise DownloadError
                                    logging.info('  Waiting for {} seconds before retrying...'.format(back_off))
                                    time.sleep(back_off)
                                    back_off = back_off * 2
                                    continue
                                break
                        except DownloadError:
                            break
                        
                        _, params = cgi.parse_header(cd_header)
                        file_name = params['filename']
                    
                        size = response.headers.get('content-length', None)
                        size = 'n/a' if size == None else int(size)
                        logging.info('    File \'%s\', length: %s' % (file_name, size))
                        
                        # copy the file to the appropriate folder
                        room_folder = assert_folder(p_state, base_path, room_id, room_folder)
                        copy_attachment(p_state, base_path, room_id, room_folder, message, attachment_index, file_name, response)
                        response.close()
                    # for attachment in message['files']:
                    
                    # when done with a message set the last activity state for the current room
                    set_last_activity(p_state, room, message['created'])
                # for message in messages:
                
                # when done with all message in the room set the last activity state for the current_room
                set_last_activity(p_state, room, room['lastActivity'])
            except spark_api.APIError as e:
                try:
                    logging.error('Error getting messages from room %s: %s' % (room_folder, e.info.get('message', 'unknown error')))
                except Exception:
                    logging.error('Error getting messages from room %s: %s' % (room_folder, e.info))
                messages = []
            
            logging.debug('Saving state to file %s' % state_file)
            f = open(state_file, 'w')
            json.dump(p_state, f, indent = 4)
            f.close()
                
    except Exception:
        logging.debug('Saving state to file %s' % state_file)
        f = open(state_file, 'w')
        json.dump(p_state, f, indent = 4)
        f.close()
        raise
    # Setting the last modified date of the folders in line with the latest attachment in the room is a nice idea
    for room_state in (r for r in p_state.values() if 'folder' in r):
        folder = os.path.join(base_path, room_state['folder'])
        dates = [m['created'] for m in room_state.get('messages', {}).values()]
        dates.sort()
        latest = dates[-1]
        f_time = str_to_datetime(latest).timestamp()
        try:
            os.utime(folder, (f_time, f_time))
        except Exception as e:
            logging.error('Error setting timestamp of folder {}:{}'.format(folder, e))
    return