def wrapper(self, endpoint, auto_retry=False, **kwargs): self.add_auth_to_headers(kwargs) back_off = 1 retries = 0 while True: try: response = f(self, endpoint, **kwargs) except requests.exceptions.ConnectionError: if retries < 5: log.warning('Connection error encountered. Retry') retries = retries + 1 continue else: raise retries = 0 dump_response(response) if response.status_code == 429: try: retry_after = min(int(response.headers['retry-after']), 1) except Exception: retry_after = 1 log.warning('429 encountered. Retry after {} seconds'.format( retry_after)) time.sleep(retry_after) continue if (response.status_code in [500, 502 ]) and auto_retry and back_off < 600: message = {} # if we get a 500 b/c a message can not be decrypted then a retry will not help try: message = response.json() except JSONDecodeError: message = {} if message.get('message', '') in [ 'Unable to parse encrypted message', 'Unable to decrypt content name.', 'Unable to decrypt message', 'DefaultActivityEncryptionKeyUrl not found.' ]: # retry does not help break # if message.get.. log.warning( '\'{}\' encountered. Message {}. Retry, waiting for {} second(s)' .format(response.reason, message, back_off)) time.sleep(back_off) back_off = back_off * 2 continue # if (response.status_code ... break # while True: return response
def refresh_token_to_access_token(self, refresh_token, client_info): endpoint = self.endpoint('access_token') data = Struct() data.grant_type = 'refresh_token' data.refresh_token = refresh_token.token data.client_id = client_info['id'] data.client_secret = client_info['secret'] # Sometime we get a 401 and retrying helps to fix that temporary glitch for _ in range(5): response = self.session.post(endpoint, data=data.get_dict()) dump_response(response) if response.status_code != 401: break log.warning('Got 401 on token refresh. Retrying...') if response.status_code != 200: raise IbError('Unexpected status code on GET(12): {} {}'.format(response.status_code, response.reason), response.status_code, response.reason, response.text) result = Struct(response.json()) return result
def auth_code_to_token(self, client_info, code): endpoint = self.endpoint('access_token') data = Struct() data.grant_type = 'authorization_code' data.redirect_uri = client_info['redirect_uri'] data.code = code data.client_id = client_info['id'] data.client_secret = client_info['secret'] log.debug('Exchanging code for access token. POST to {}'.format(endpoint)) response = self.session.post(endpoint, data=data.get_dict()) dump_response(response) if response.status_code != 200: raise IbError('Unexpected status code on POST(12): {} {}'.format(response.status_code, response.reason)) oauth_token = Struct(response.json()) return oauth_token
def auth_code_to_token(self, client_info, code): endpoint = self.endpoint('access_token') data = Struct() data.grant_type = 'authorization_code' data.redirect_uri = client_info['redirect_uri'] data.code = code data.client_id = client_info['id'] data.client_secret = client_info['secret'] log.debug( 'Exchanging code for access token. POST to {}'.format(endpoint)) response = self.session.post(endpoint, data=data.get_dict()) dump_response(response) if response.status_code != 200: raise IbError('Unexpected status code on POST(12): {} {}'.format( response.status_code, response.reason)) oauth_token = Struct(response.json()) return oauth_token
def _follow_redirects(self, response, intercept_url=''): ''' Follow redirects of response. If intercept_url is given the redirect chain stops at the 1st url which starts with the given value Return: if intercept_url is given then we return a dictionary of values in the intercepted URL or none if no intercept happened if no intercept_url is given then we return the last response. ''' while response.status_code == 302: dump_response(response) # determine redirection target location = response.headers['location'].strip() loc_url = urllib.parse.urlparse(location) if loc_url.query: query = urllib.parse.parse_qs(loc_url.query) if 'error' in query: raise FlowError('OAuth error(11): {}, {}'.format(query['error'][0], query['error_description'][0])) if intercept_url and location.startswith(intercept_url): return query # follow redirection location = urllib.parse.urljoin(response.request.url, location) response = self.session.get(location, allow_redirects=False) return None if intercept_url else response
def refresh_token_to_access_token(self, refresh_token, client_info): endpoint = self.endpoint('access_token') data = Struct() data.grant_type = 'refresh_token' data.refresh_token = refresh_token.token data.client_id = client_info['id'] data.client_secret = client_info['secret'] # Sometime we get a 401 and retrying helps to fix that temporary glitch for _ in range(5): response = self.session.post(endpoint, data=data.get_dict()) dump_response(response) if response.status_code != 401: break log.warning('Got 401 on token refresh. Retrying...') if response.status_code != 200: raise IbError( 'Unexpected status code on GET(12): {} {}'.format( response.status_code, response.reason), response.status_code, response.reason, response.text) result = Struct(response.json()) return result
def _follow_redirects(self, response, intercept_url=''): ''' Follow redirects of response. If intercept_url is given the redirect chain stops at the 1st url which starts with the given value Return: if intercept_url is given then we return a dictionary of values in the intercepted URL or none if no intercept happened if no intercept_url is given then we return the last response. ''' while response.status_code == 302: dump_response(response) # determine redirection target location = response.headers['location'].strip() loc_url = urllib.parse.urlparse(location) if loc_url.query: query = urllib.parse.parse_qs(loc_url.query) if 'error' in query: raise FlowError('OAuth error(11): {}, {}'.format( query['error'][0], query['error_description'][0])) if intercept_url and location.startswith(intercept_url): return query # follow redirection location = urllib.parse.urljoin(response.request.url, location) response = self.session.get(location, allow_redirects=False) return None if intercept_url else response
def _cisco_sso_user_auth(self, response, user_id, user_password): ''' execute the full web browser flow for a cisco.com SSO enabled user return is the response after the authentication flow. The response typically will be the form asking for authorization for the client ''' # Form based authentication for cisco.com SSO enabled user # this gets us a hidden form which we need to submit soup = bs4.BeautifulSoup(response.text, 'lxml') form = soup.find('form') if not form: raise FlowError('No form found(2)') # the action tag has a URL to be used for the form action. The full URL uses the same base as the request URL form_action = urllib.parse.urljoin(response.request.url, form.get('action', urllib.parse.urlparse(response.request.url).path)) # There should be a few input fields carrying RelayState and SAMLRequest inputs = form.find_all('input') if not inputs: raise FlowError('No input fields found(3)') # compile the form data form_data = {inp['name'] : inp['value'] for inp in inputs if inp['type'] != 'submit'} # Try to post the form log.debug('auth code grant flow (Cisco SSO, {}): submit hidden form with SAMLRequest to {}'.format(user_id, form_action)) response = self.session.post(form_action, data = form_data) dump_response(response) if response.status_code !=200: raise FlowError('Unexpected status code on POST(4): {} {}'.format(response.status_code, response.reason)) # this get's us to a page where the CEC credentials need to be entered # Now we should be at the point where we use form based authentication soup = bs4.BeautifulSoup(response.text, 'lxml') form = soup.find('form') if not form: raise FlowError('No form found(5)') # the action tag has a URL to be used for the form action. The full URL uses the same base as the request URL form_action = urllib.parse.urljoin(response.request.url, form.get('action', urllib.parse.urlparse(response.url).path)) inputs = form.find_all('input') if not inputs: raise FlowError('No input fields found(6)') # compile the form data # we assume that the 1st two fields are user and password form_data = {inp['name'] : inp['value'] for inp in inputs[2:] if inp['type'] != 'submit'} form_data[inputs[0]['name']] = user_id form_data[inputs[1]['name']] = user_password # Try to post the form, w/o redirects; location has trailing spaces which the requests modulde does not strip log.debug('auth code grant flow (Cisco SSO, {}): Posting credentials to {}'.format(user_id, form_action)) for _ in range(5): try: response = self.session.post(form_action, data = form_data, allow_redirects=False) except requests.exceptions.ConnectionError: time.sleep(1) continue break response = self._follow_redirects(response) dump_response(response) if response.status_code !=200: raise FlowError('Unexpected status code on POST(7): {} {}'.format(response.status_code, response.reason)) # let's check for an error message soup = bs4.BeautifulSoup(response.text, 'lxml') warn_msg = soup.find(id='warning-msg') if warn_msg: raise FlowError('Authentication problem: \n{}'.format(warn_msg.text.strip())) # this gets us to a page with some JavaScript code which resumes somewhere q = urllib.parse.parse_qs(urllib.parse.urlparse(response.url).query, keep_blank_values=True) if not 'resumePath' in q: raise FlowError ('Could not find resume path in query string: {}'.format(response.url)) resume_url = 'https://cloudsso.cisco.com' + q['resumePath'][0] log.debug('auth code grant flow (Cisco SSO, {}): Resume flow. Get on {}'.format(user_id, resume_url)) retries = 0 while True: try: response = self.session.get(resume_url) except requests.exceptions.ConnectionError: retries += 1 if retries >= 5: raise time.sleep(1) continue break dump_response(response) if response.status_code !=200: raise FlowError('Unexpected status code on GET(8): {} {}'.format(response.status_code, response.reason)) # this returns a page with <body onload="javascript:document.forms[0].submit()"> # So we again need to look at the embedded form soup = bs4.BeautifulSoup(response.text, 'lxml') form = soup.find('form') if not form: raise FlowError('No form found(9)') # the action tag has a URL to be used for the form action. The full URL uses the same base as the request URL form_action = urllib.parse.urljoin(response.url, form.get('action', urllib.parse.urlparse(response.url).path)) # There should be a few input fields carrying RelayState and SAMLResponse inputs = form.find_all('input') if not inputs: raise FlowError('No input fields found(10)') # compile the form data form_data = {inp['name'] : inp['value'] for inp in inputs if inp['type'] != 'submit'} if log.isEnabledFor(logging.DEBUG): # take a look at the SAMLResponse saml_response = form_data['SAMLResponse'] for l in (s for s in xml.dom.minidom.parseString(base64.b64decode(saml_response)).toprettyxml().splitlines() if s.strip()): log.debug('SAML Response: {}'.format(l)) #print('SAML Response:\n ') #print('\n '.join((s for s in xml.dom.minidom.parseString(base64.b64decode(saml_response)).toprettyxml().splitlines() if s.strip()))) # post the form, but w/o automatic redirects, b/c we want to be able to intercept errors log.debug('auth code grant flow (Cisco SSO, {}): Submit hidden form to {}'.format(user_id, form_action)) response = self.session.post(form_action, data = form_data, allow_redirects=False) response = self._follow_redirects(response) if response.status_code !=200: raise FlowError('Unexpected status code on GET(12): {} {}'.format(response.status_code, response.reason)) return response
def auth_code_grant_flow(self, user_info, client_info, scope = 'webexsquare:admin'): ''' Executes an OAuth Authorization Code Grant Flow Returns an Authorizatioon code ''' assert user_info['email'] assert user_info['id'] assert user_info['password'] assert client_info['id'] assert client_info['redirect_uri'] assert client_info['secret'] # we try to use the Authorization Code Grant Flow endpoint = self.endpoint('authorize') # random state flow_state = str(uuid.uuid4()) data = Struct() data.response_type = 'code' data.state = flow_state data.client_id = client_info['id'] data.redirect_uri = client_info['redirect_uri'] data.scope = scope log.debug('auth code grant flow: access endpoint {}'.format(endpoint)) response = self.session.get(endpoint, params=data.get_dict()) dump_response(response) if response.status_code !=200: raise FlowError('Unexpected status code on GET(1): {} {}'.format(response.status_code, response.reason)) # after a number of redirects this gets us to a page on which we need to enter an email address # The title is "Sign In - Cisco WebEx" # if we still have a valid session cookie we might actually get to the OAuth2 authorization page directly soup = bs4.BeautifulSoup(response.text, 'lxml') title = soup.find('title') if not(title and title.text.strip() in ['Sign In - Cisco WebEx', 'OAuth2 Authorization - Cisco WebEx']): raise FlowError('Didn\'t find expected title') if title and title.text.strip() == 'Sign In - Cisco WebEx': # Need to sign in. log.debug('auth code grant flow: found expected \'Sign In - Cisco WebEx\'') ''' This form is part of the reply: <form name="GlobalEmailLookup" id="GlobalEmailLookupForm" method="post" action="/idb/globalLogin"> <input type="hidden" id="email" name="email" value=""></input> <input type="hidden" id="isCookie" name="isCookie" value="false"></input> <input type="hidden" name="gotoUrl" value="aHR0cHM6Ly9pZGJyb2tlci53ZWJleC5jb20vaWRiL29hdXRoMi92MS9hdXRob3JpemU/c2NvcGU9c3BhcmslM0FwZW9wbGVfcmVhZCtzcGFyayUzQXJvb21zX3JlYWQrc3BhcmslM0FtZW1iZXJzaGlwc19yZWFkK3NwYXJrJTNBbWVzc2FnZXNfcmVhZCZjbGllbnRfaWQ9Q2U2N2Y5NzE0YTEzN2U2ODg0OGJhNjQ1YzQ4NjBmYThhZWUyYzUwMzFlZTA1YmMyMjE2MzNkMGNlZWRlOWExYjkmcmVkaXJlY3RfdXJpPWh0dHBzJTNBJTJGJTJGb2F1dGgua3JvaG5zLmRlJTJGb2F1dGgyJnN0YXRlPXNvbWVSYW5kb21TdHJpbmcmcmVzcG9uc2VfdHlwZT1jb2Rl" /> <input type="hidden" id="encodedParamsString" name="encodedParamsString" value="dHlwZT1sb2dpbg==" /> </form> A POST with the email address to that form is the next step ''' soup = bs4.BeautifulSoup(response.text, 'lxml') form = soup.find(id = 'GlobalEmailLookupForm') if not form: raise FlowError('Couldn\'t find form \'GlobalEmailLookupForm\' to post user\'s email address') inputs = form.find_all('input') # 1st input is the email address inputs[0]['value'] = user_info['email'] form_data = {i['name'] : i['value'] for i in inputs} form_action = urllib.parse.urljoin(response.request.url, form.get('action', urllib.parse.urlparse(response.request.url).path)) log.debug('auth code grant flow: Posting email address {} to form {}'.format(user_info['email'], form_action)) response = self.session.post(form_action, data = form_data) dump_response(response) # For CIS users this redirects us to a page with title "Sign In - Cisco WebEx" log.debug('auth code grant flow: Checking for title \'Sign In - Cisco WebEx\'') soup = bs4.BeautifulSoup(response.text, 'lxml') title = soup.find('title') if title and title.text.strip() == 'Sign In - Cisco WebEx': # Identified the form to directly enter credentials dump_response(response) # search for the form with name 'Login' form = soup.find(lambda tag : tag.name == 'form' and tag.get('name', '') == 'Login') inputs = form.find_all('input') form_data = {i['name'] : i['value'] for i in inputs} form_data['IDToken0'] = '' form_data['IDToken1'] = user_info['email'] form_data['IDToken2'] = user_info['password'] form_data['IDButton'] = 'Sign In' form_action = urllib.parse.urljoin(response.request.url, form.get('action', urllib.parse.urlparse(response.request.url).path)) log.debug('auth code grant flow: Found title \'Sign In - Cisco WebEx\'. Posting credentials to {}'.format(form_action)) response = self.session.post(form_action, data = form_data) dump_response(response) else: # authentication of a cisco.com SSO enabled user requires multiple steps (SAML 2.0 REDIRECT/POST flow with some javascript ... response = self._cisco_sso_user_auth(response, user_info['id'], user_info['password']) # if title and title.text.strip() == if title and title.text.strip() == 'Sign In - Cisco WebEx': .. else .. # if title and title.text.strip() == 'Sign In - Cisco WebEx': # this now is a form where we are requested to grant the requested access soup = bs4.BeautifulSoup(response.text, 'lxml') form = soup.find('form') if not form: raise FlowError('No form found(13)') # the action tag has a URL to be used for the form action. The full URL uses the same base as the request URL form_action = urllib.parse.urljoin(response.url, form.get('action', urllib.parse.urlparse(response.url).path)) inputs = form.find_all('input') if not inputs: raise FlowError('No input fields found(14)') # compile the form data # the form basically has few hidden fields and the "decision" field needs to be set to "accept" form_data = {inp['name'] : inp['value'] for inp in inputs if inp['type'] == 'hidden'} form_data['decision'] = 'accept' # Again post, but no automatic redirects log.debug('auth code grant flow: Granting access to client by posting \'accept\' decision') response = self.session.post(form_action, data = form_data, allow_redirects=False) # follow redirects, but stop at client redirect URI; this allows to use non-existing redirect URIs response = self._follow_redirects(response, client_info['redirect_uri']) if not response: raise FlowError('Failed to get OAuth authorization code') if response['state'][0] != flow_state: raise FlowError('State has been tampered with?!. Got ({}), expected ({})'.format(response['state'][0], flow_state)) return response['code'][0]
def _cisco_sso_user_auth(self, response, user_id, user_password): ''' execute the full web browser flow for a cisco.com SSO enabled user return is the response after the authentication flow. The response typically will be the form asking for authorization for the client ''' # Form based authentication for cisco.com SSO enabled user # this gets us a hidden form which we need to submit soup = bs4.BeautifulSoup(response.text, 'lxml') form = soup.find('form') if not form: raise FlowError('No form found(2)') # the action tag has a URL to be used for the form action. The full URL uses the same base as the request URL form_action = urllib.parse.urljoin( response.request.url, form.get('action', urllib.parse.urlparse(response.request.url).path)) # There should be a few input fields carrying RelayState and SAMLRequest inputs = form.find_all('input') if not inputs: raise FlowError('No input fields found(3)') # compile the form data form_data = { inp['name']: inp['value'] for inp in inputs if inp['type'] != 'submit' } # Try to post the form log.debug( 'auth code grant flow (Cisco SSO, {}): submit hidden form with SAMLRequest to {}' .format(user_id, form_action)) response = self.session.post(form_action, data=form_data) dump_response(response) if response.status_code != 200: raise FlowError('Unexpected status code on POST(4): {} {}'.format( response.status_code, response.reason)) # this get's us to a page where the CEC credentials need to be entered # Now we should be at the point where we use form based authentication soup = bs4.BeautifulSoup(response.text, 'lxml') form = soup.find('form') if not form: raise FlowError('No form found(5)') # the action tag has a URL to be used for the form action. The full URL uses the same base as the request URL form_action = urllib.parse.urljoin( response.request.url, form.get('action', urllib.parse.urlparse(response.url).path)) inputs = form.find_all('input') if not inputs: raise FlowError('No input fields found(6)') # compile the form data # we assume that the 1st two fields are user and password form_data = { inp['name']: inp['value'] for inp in inputs[2:] if inp['type'] != 'submit' } form_data[inputs[0]['name']] = user_id form_data[inputs[1]['name']] = user_password # Try to post the form, w/o redirects; location has trailing spaces which the requests modulde does not strip log.debug( 'auth code grant flow (Cisco SSO, {}): Posting credentials to {}'. format(user_id, form_action)) for _ in range(5): try: response = self.session.post(form_action, data=form_data, allow_redirects=False) except requests.exceptions.ConnectionError: time.sleep(1) continue break response = self._follow_redirects(response) dump_response(response) if response.status_code != 200: raise FlowError('Unexpected status code on POST(7): {} {}'.format( response.status_code, response.reason)) # let's check for an error message soup = bs4.BeautifulSoup(response.text, 'lxml') warn_msg = soup.find(id='warning-msg') if warn_msg: raise FlowError('Authentication problem: \n{}'.format( warn_msg.text.strip())) # this gets us to a page with some JavaScript code which resumes somewhere q = urllib.parse.parse_qs(urllib.parse.urlparse(response.url).query, keep_blank_values=True) if not 'resumePath' in q: raise FlowError( 'Could not find resume path in query string: {}'.format( response.url)) resume_url = 'https://cloudsso.cisco.com' + q['resumePath'][0] log.debug( 'auth code grant flow (Cisco SSO, {}): Resume flow. Get on {}'. format(user_id, resume_url)) retries = 0 while True: try: response = self.session.get(resume_url) except requests.exceptions.ConnectionError: retries += 1 if retries >= 5: raise time.sleep(1) continue break dump_response(response) if response.status_code != 200: raise FlowError('Unexpected status code on GET(8): {} {}'.format( response.status_code, response.reason)) # this returns a page with <body onload="javascript:document.forms[0].submit()"> # So we again need to look at the embedded form soup = bs4.BeautifulSoup(response.text, 'lxml') form = soup.find('form') if not form: raise FlowError('No form found(9)') # the action tag has a URL to be used for the form action. The full URL uses the same base as the request URL form_action = urllib.parse.urljoin( response.url, form.get('action', urllib.parse.urlparse(response.url).path)) # There should be a few input fields carrying RelayState and SAMLResponse inputs = form.find_all('input') if not inputs: raise FlowError('No input fields found(10)') # compile the form data form_data = { inp['name']: inp['value'] for inp in inputs if inp['type'] != 'submit' } if log.isEnabledFor(logging.DEBUG): # take a look at the SAMLResponse saml_response = form_data['SAMLResponse'] for l in (s for s in xml.dom.minidom.parseString( base64.b64decode( saml_response)).toprettyxml().splitlines() if s.strip()): log.debug('SAML Response: {}'.format(l)) #print('SAML Response:\n ') #print('\n '.join((s for s in xml.dom.minidom.parseString(base64.b64decode(saml_response)).toprettyxml().splitlines() if s.strip()))) # post the form, but w/o automatic redirects, b/c we want to be able to intercept errors log.debug( 'auth code grant flow (Cisco SSO, {}): Submit hidden form to {}'. format(user_id, form_action)) response = self.session.post(form_action, data=form_data, allow_redirects=False) response = self._follow_redirects(response) if response.status_code != 200: raise FlowError('Unexpected status code on GET(12): {} {}'.format( response.status_code, response.reason)) return response
def auth_code_grant_flow(self, user_info, client_info, scope='webexsquare:admin'): ''' Executes an OAuth Authorization Code Grant Flow Returns an Authorizatioon code ''' assert user_info['email'] assert user_info['id'] assert user_info['password'] assert client_info['id'] assert client_info['redirect_uri'] assert client_info['secret'] # we try to use the Authorization Code Grant Flow endpoint = self.endpoint('authorize') # random state flow_state = str(uuid.uuid4()) data = Struct() data.response_type = 'code' data.state = flow_state data.client_id = client_info['id'] data.redirect_uri = client_info['redirect_uri'] data.scope = scope log.debug('auth code grant flow: access endpoint {}'.format(endpoint)) response = self.session.get(endpoint, params=data.get_dict()) dump_response(response) if response.status_code != 200: raise FlowError('Unexpected status code on GET(1): {} {}'.format( response.status_code, response.reason)) # after a number of redirects this gets us to a page on which we need to enter an email address # The title is "Sign In - Cisco WebEx" # if we still have a valid session cookie we might actually get to the OAuth2 authorization page directly soup = bs4.BeautifulSoup(response.text, 'lxml') title = soup.find('title') if not (title and title.text.strip() in [ 'Sign In - Cisco WebEx', 'OAuth2 Authorization - Cisco WebEx' ]): raise FlowError('Didn\'t find expected title') if title and title.text.strip() == 'Sign In - Cisco WebEx': # Need to sign in. log.debug( 'auth code grant flow: found expected \'Sign In - Cisco WebEx\'' ) ''' This form is part of the reply: <form name="GlobalEmailLookup" id="GlobalEmailLookupForm" method="post" action="/idb/globalLogin"> <input type="hidden" id="email" name="email" value=""></input> <input type="hidden" id="isCookie" name="isCookie" value="false"></input> <input type="hidden" name="gotoUrl" value="aHR0cHM6Ly9pZGJyb2tlci53ZWJleC5jb20vaWRiL29hdXRoMi92MS9hdXRob3JpemU/c2NvcGU9c3BhcmslM0FwZW9wbGVfcmVhZCtzcGFyayUzQXJvb21zX3JlYWQrc3BhcmslM0FtZW1iZXJzaGlwc19yZWFkK3NwYXJrJTNBbWVzc2FnZXNfcmVhZCZjbGllbnRfaWQ9Q2U2N2Y5NzE0YTEzN2U2ODg0OGJhNjQ1YzQ4NjBmYThhZWUyYzUwMzFlZTA1YmMyMjE2MzNkMGNlZWRlOWExYjkmcmVkaXJlY3RfdXJpPWh0dHBzJTNBJTJGJTJGb2F1dGgua3JvaG5zLmRlJTJGb2F1dGgyJnN0YXRlPXNvbWVSYW5kb21TdHJpbmcmcmVzcG9uc2VfdHlwZT1jb2Rl" /> <input type="hidden" id="encodedParamsString" name="encodedParamsString" value="dHlwZT1sb2dpbg==" /> </form> A POST with the email address to that form is the next step ''' soup = bs4.BeautifulSoup(response.text, 'lxml') form = soup.find(id='GlobalEmailLookupForm') if not form: raise FlowError( 'Couldn\'t find form \'GlobalEmailLookupForm\' to post user\'s email address' ) inputs = form.find_all('input') # 1st input is the email address inputs[0]['value'] = user_info['email'] form_data = {i['name']: i['value'] for i in inputs} form_action = urllib.parse.urljoin( response.request.url, form.get('action', urllib.parse.urlparse(response.request.url).path)) log.debug( 'auth code grant flow: Posting email address {} to form {}'. format(user_info['email'], form_action)) response = self.session.post(form_action, data=form_data) dump_response(response) # For CIS users this redirects us to a page with title "Sign In - Cisco WebEx" log.debug( 'auth code grant flow: Checking for title \'Sign In - Cisco WebEx\'' ) soup = bs4.BeautifulSoup(response.text, 'lxml') title = soup.find('title') if title and title.text.strip() == 'Sign In - Cisco WebEx': # Identified the form to directly enter credentials dump_response(response) # search for the form with name 'Login' form = soup.find(lambda tag: tag.name == 'form' and tag.get( 'name', '') == 'Login') inputs = form.find_all('input') form_data = {i['name']: i['value'] for i in inputs} form_data['IDToken0'] = '' form_data['IDToken1'] = user_info['email'] form_data['IDToken2'] = user_info['password'] form_data['IDButton'] = 'Sign In' form_action = urllib.parse.urljoin( response.request.url, form.get('action', urllib.parse.urlparse(response.request.url).path)) log.debug( 'auth code grant flow: Found title \'Sign In - Cisco WebEx\'. Posting credentials to {}' .format(form_action)) response = self.session.post(form_action, data=form_data) dump_response(response) else: # authentication of a cisco.com SSO enabled user requires multiple steps (SAML 2.0 REDIRECT/POST flow with some javascript ... response = self._cisco_sso_user_auth(response, user_info['id'], user_info['password']) # if title and title.text.strip() == if title and title.text.strip() == 'Sign In - Cisco WebEx': .. else .. # if title and title.text.strip() == 'Sign In - Cisco WebEx': # this now is a form where we are requested to grant the requested access soup = bs4.BeautifulSoup(response.text, 'lxml') form = soup.find('form') if not form: raise FlowError('No form found(13)') # the action tag has a URL to be used for the form action. The full URL uses the same base as the request URL form_action = urllib.parse.urljoin( response.url, form.get('action', urllib.parse.urlparse(response.url).path)) inputs = form.find_all('input') if not inputs: raise FlowError('No input fields found(14)') # compile the form data # the form basically has few hidden fields and the "decision" field needs to be set to "accept" form_data = { inp['name']: inp['value'] for inp in inputs if inp['type'] == 'hidden' } form_data['decision'] = 'accept' # Again post, but no automatic redirects log.debug( 'auth code grant flow: Granting access to client by posting \'accept\' decision' ) response = self.session.post(form_action, data=form_data, allow_redirects=False) # follow redirects, but stop at client redirect URI; this allows to use non-existing redirect URIs response = self._follow_redirects(response, client_info['redirect_uri']) if not response: raise FlowError('Failed to get OAuth authorization code') if response['state'][0] != flow_state: raise FlowError( 'State has been tampered with?!. Got ({}), expected ({})'. format(response['state'][0], flow_state)) return response['code'][0]
def get_attachments(): def assert_folder(p_state, base_path, room_id, room_folder): ''' make sure that the folder is created for the room ''' if not os.path.lexists(base_path): # base directory needs to be created logging.debug('Base directory %s does not exist' % base_path) os.mkdir(base_path) full_path = os.path.join(base_path, room_folder) if room_id not in p_state: p_state[room_id] = {} room_state = p_state[room_id] if 'folder' not in room_state: logging.debug('No previous folder for room %s' % room_folder) # the folder for this room hasn't been created before i = 0 base_folder = room_folder while True: full_path = os.path.join(base_path, room_folder) try: os.mkdir(full_path) logging.debug('Created folder %s' % full_path) except FileExistsError: # Folder exists, but not for this room? # Try to find the room the folder has been created for logging.debug('Folder {} already exists'.format(full_path)) r = next((r for r in p_state.values() if r.get('folder') == room_folder), None) if r: i = i + 1 room_folder = base_folder + str(i) logging.debug('Room folder {} belongs to identified. Creating alternate name {} for new folder'.format(full_path, room_folder)) # we need to come up with a different folder name for the new folder continue else: # this folder seems to be stale? logging.debug('Folder {} seems to belong to no room. Renaming to {}'.format(full_path, full_path + '.stale')) os.rename(full_path, full_path + '.stale') os.mkdir(full_path) logging.debug('Created folder %s' % full_path) break # while # remember the folder name for this room room_state['folder'] = room_folder else: # has the folder name been changed? if room_folder != room_state['folder']: logging.debug('Room name (folder) for room %s changed from %s to %s' % (room_id, room_state['folder'], room_folder)) old_full_path = os.path.join(base_path, room_state['folder']) logging.debug('Renaming %s to %s' % (old_full_path, full_path)) try: os.rename(old_full_path, full_path) except FileNotFoundError: logging.warning('Tried to rename folder {} but the folder did not exist'.format(old_full_path)) if os.path.lexists(full_path): logging.warning('New folder {} exists. Assuming this is the correct folder'.format(full_path)) else: logging.warning('New folder also does not exist. Potentially lost state!?') room_state['folder'] = room_folder # if room_folder != ... if not os.path.lexists(full_path): logging.debug('Folder %s does not exist and will be created' % full_path) os.mkdir(full_path) # we might have changed the folder name. So we return the potentially updated value return room_folder def copy_attachment(p_state, base_path, room_id, room_folder, message, attachment_index, file_name, response): ''' read the attachment to a file ''' message_id = message['id'] message_created = message['created'] # remove whitespaces from file_name (base, ext) = os.path.splitext(file_name) base = base.strip() ext = ext.strip() file_name = base + ext file_name = file_name.strip() full_path = os.path.join(base_path, room_folder) full_name = os.path.join(full_path, file_name) room_state = p_state[room_id] if 'messages' not in room_state: logging.debug('Initialize message state in room state') room_state['messages'] = {} messages_state = room_state['messages'] if message_id not in messages_state: logging.debug('Initialize message state for message %s from %s' % (message_id, str_to_datetime(message_created).isoformat())) messages_state[message_id] = {'created' : message_created} message_state = messages_state[message_id] attachment_index = str(attachment_index).strip() if attachment_index not in message_state: logging.debug('New attachment. Message %s from %s, index %s, file \'%s\'' % (message_id, message_created, attachment_index, file_name)) # record the file name for this attachment if os.path.exists(full_name): logging.debug('File \'%s\' already exists' % file_name) # Find the message and index which currently uses this name # The Mac OS X file system in case preserving but case insensitive so "attachment.png" and "Attachment.png" are the 'same' # we have to consider that when searching for the message which references to a given file name: the check needs to be case insensitive class UpdateDone(Exception): pass try: for _, ms in messages_state.items(): for idx in ms: if ms[idx].lower() == file_name.lower(): # this is the existing entry logging.debug('Existing file \'%s\' belongs to message from %s' % (ms[idx], str_to_datetime(ms['created']).isoformat())) # the older file needs to be renamed if message_created > ms['created']: logging.debug('This attachment seems to be newer. This: %s, existing: %s' % (str_to_datetime(message_created).isoformat(), str_to_datetime(ms['created']).isoformat())) logging.debug('Existing file needs to be renamed') (base, ext) = os.path.splitext(ms[idx]) new_name = base + '_' + str_to_datetime(ms['created']).strftime('%Y%m%d%H%M%S') + '-' + str(attachment_index).strip() + ext logging.debug('File will be renamed to \'%s\'' % new_name) os.rename(os.path.join(full_path, ms[idx]), os.path.join(full_path, new_name)) ms[idx] = new_name else: logging.debug('This attachment seems to be older. This: %s, existing: %s' % (str_to_datetime(message_created).isoformat(), str_to_datetime(ms['created']).isoformat())) logging.debug('This attachment needs to be saved under a different name') (base, ext) = os.path.splitext(file_name) file_name = base + '_' + str_to_datetime(message_created).strftime('%Y%m%d%H%M%S') + '-' + str(attachment_index).strip() + ext full_name = os.path.join(base_path, room_folder, file_name) logging.debug('Attachment will be saved as %s instead' % file_name) raise UpdateDone # if ms[idx] .. # for idx in ms: # for _, ms in messages_state.items(): logging.warning('File \'%s\' exists, but message this attachment belongs to could not be found' % full_name) logging.warning('.. renaming to {}'.format(full_name + '.stale')) os.rename(full_name, full_name + '.stale') except UpdateDone: pass else: # the file does not exist. For sanity reasons remove all references to attachments with the same name from the message state # reason: user might have "cleaned up" the attachment repository on the file system and deleted a file for _, ms in messages_state.items(): for idx in list(ms.keys()): if ms[idx].lower() == file_name.lower(): logging.debug('Found stale message state for file %s from %s. Removing state..' % (ms[idx], str_to_datetime(ms['created']).isoformat())) del ms[idx] # now finally copy the file logging.info(' Downloading attachment to \'%s\'' % full_name) with open(full_name, 'wb') as f: response.raw.decode_content = True shutil.copyfileobj(response.raw, f) # set access and last modified date f_time = str_to_datetime(message_created).timestamp() os.utime(full_name, (f_time, f_time)) message_state[attachment_index] = file_name else: logging.debug('Attachment already downloaded. Message %s from %s, index %s, file \'%s\' as \'%s\'' % (message_id, message_created, attachment_index, file_name, message_state[attachment_index])) logging.info(' Already downloaded. Skipping file...') return def check_new_activity(p_state, room): ''' check whether there is new activity in the room returns: None - no new activity '' - all messsages in the room are new <datestring> - date/time of last activity. Only newer activities need to be considered ''' room_id = room['id'] last_activity = room['lastActivity'] if room_id in p_state: last_seen = p_state[room_id].get('lastActivity', '') if last_activity != p_state[room_id].get('lastActivity', ''): logging.debug('New activity in room: last seen %s, now %s' % (last_seen, last_activity)) # p_state[room_id]['lastActivity'] = last_activity return last_seen else: logging.debug('No new activity in room: last seen %s' % last_seen) return None else: # p_state[room_id] = {'lastActivity' : last_activity} logging.debug('New activity in room. Room never tested before') return '' def set_last_activity(p_state, room, activity): ''' sets 'lastActivity' for the given rooom in p_state ''' room_id = room['id'] logging.debug('Setting last activity for room to: {}'.format(activity)) if room_id in p_state: p_state[room_id]['lastActivity'] = activity else: p_state[room_id] = {'lastActivity': activity} return setup_logging() spark_config = configparser.ConfigParser() spark_config.read('spark.ini') set_mask_password(spark_config['user']['password']) ib = SparkDevIdentityBroker() oauth_token = OAuthToken(ib, spark_config['user'], spark_config['client']) spark = spark_api.SparkAPI(oauth_token) att_config = configparser.ConfigParser() att_config.read(os.path.splitext(__file__)[0] + '.ini') base_path = os.path.abspath(os.path.expanduser(att_config['path']['base'])) state_file = os.path.splitext(__file__)[0] + '.json' try: f = open(state_file, 'r') except IOError: logging.debug('Did not find saved state in file %s' % state_file) p_state = {} else: logging.debug('Reading saved state from file %s' % state_file) p_state = json.load(f) f.close() logging.info('Getting list of rooms...') try: rooms = list(spark.list_rooms()) except spark_api.APIError as e: try: logging.error('Error getting rooms: %s' % e.args[2]['message']) except Exception: logging.error('Error getting rooms: %s' % e.args[2]) rooms = [] logging.info('Found {} rooms'.format(len(rooms))) try: for room in rooms: room_id = room['id'] # in case the room doesn't have a title we use the room ID as fallback room_folder = valid_filename(room.get('title', room_id)) logging.info('Checking room \'%s\'' % room_folder) logging.debug('ID: %s, %s' % (room_id, spark_api.base64_id_to_str(room_id))) last_activity = check_new_activity(p_state, room) if last_activity == None: logging.info('No new activity. Skipping room') continue # iterate through all messages with attachments def get_messages_with_attachments(room_id, last_activity): ''' get all messages with attachment of given room newer than last_activity ''' # if we never read the room try to read messages in bigger chunks max_messages = 200 if not last_activity else 50 for m in spark.list_messages(room_id, p_max=max_messages): if m['created'] <= last_activity: logging.debug('Got last message after last checked activity. Last activity %s, this message %s' % (str_to_datetime(last_activity).isoformat(), str_to_datetime(m['created']).isoformat())) break if 'files' in m: # only collect messages with attachments yield m return try: messages = get_messages_with_attachments(room_id, last_activity) '''if not messages: logging.info(' No new messages with attachments in room') ''' for message in messages: message_created = str_to_datetime(message['created']) logging.info(' %s: Message with %s attachments.' % (message_created.isoformat(), len(message['files']))) for attachment_index in range(len(message['files'])): attachment = message['files'][attachment_index] class DownloadError(Exception): pass try: back_off = 1 while True: logging.debug(' Getting attachment {} from {}'.format(attachment_index, attachment)) # we set the dump_utilities log level to INFO to avoid hick-ups from trying to log the content # the current log level will be set back to the original value after level = logging.getLogger('dump_utilities').getEffectiveLevel() logging.getLogger('dump_utilities').setLevel(logging.INFO) response = spark.get(attachment, stream=True) logging.getLogger('dump_utilities').setLevel(level) dump_response(response, dump_body=False) # sometimes we don't get the attachment and instead a JSON error message is returned cd_header = response.headers.get('content-disposition', None) if cd_header == None: try: js = response.json() logging.error('Error downloading from room {}, time {}, error message: {}'.format(room_folder, message_created.isoformat(), js.get('message', 'Unknown problem: %s' % js))) except Exception: logging.error('Error downloading from room {}, time {}. No content-disposition header and no JSON found. Headers: {}'.format(room_folder, message_created.isoformat(), response.headers)) raise DownloadError response.close() if back_off > 32: raise DownloadError logging.info(' Waiting for {} seconds before retrying...'.format(back_off)) time.sleep(back_off) back_off = back_off * 2 continue break except DownloadError: break _, params = cgi.parse_header(cd_header) file_name = params['filename'] size = response.headers.get('content-length', None) size = 'n/a' if size == None else int(size) logging.info(' File \'%s\', length: %s' % (file_name, size)) # copy the file to the appropriate folder room_folder = assert_folder(p_state, base_path, room_id, room_folder) copy_attachment(p_state, base_path, room_id, room_folder, message, attachment_index, file_name, response) response.close() # for attachment in message['files']: # when done with a message set the last activity state for the current room set_last_activity(p_state, room, message['created']) # for message in messages: # when done with all message in the room set the last activity state for the current_room set_last_activity(p_state, room, room['lastActivity']) except spark_api.APIError as e: try: logging.error('Error getting messages from room %s: %s' % (room_folder, e.info.get('message', 'unknown error'))) except Exception: logging.error('Error getting messages from room %s: %s' % (room_folder, e.info)) messages = [] logging.debug('Saving state to file %s' % state_file) f = open(state_file, 'w') json.dump(p_state, f, indent = 4) f.close() except Exception: logging.debug('Saving state to file %s' % state_file) f = open(state_file, 'w') json.dump(p_state, f, indent = 4) f.close() raise # Setting the last modified date of the folders in line with the latest attachment in the room is a nice idea for room_state in (r for r in p_state.values() if 'folder' in r): folder = os.path.join(base_path, room_state['folder']) dates = [m['created'] for m in room_state.get('messages', {}).values()] dates.sort() latest = dates[-1] f_time = str_to_datetime(latest).timestamp() try: os.utime(folder, (f_time, f_time)) except Exception as e: logging.error('Error setting timestamp of folder {}:{}'.format(folder, e)) return