class BlackboardSession: def __init__(self, cookiejar, username, course_id): self.cookiejar_filename = cookiejar self.username = username self.course_id = course_id self.password = None self.cookies = LWPCookieJar(cookiejar) self.session = requests.Session() self.load_cookies() def load_cookies(self): try: self.cookies.load(ignore_discard=True) except FileNotFoundError: pass requests.cookies.merge_cookies(self.session.cookies, self.cookies) def save_cookies(self): requests.cookies.merge_cookies(self.cookies, self.session.cookies) self.cookies.save(ignore_discard=True) def get_cookie(self, key, path): try: return self.session.cookies._cookies['bb.au.dk'][path][key].value except KeyError: print(self.session.cookies._cookies) raise def get_username(self): return input("WAYF username: "******"fetch.py WAYF", self.username) if p is None: print("Please enter password for %s to store in keyring." % self.username) p = getpass.getpass() keyring.set_password("fetch.py WAYF", self.username, p) return p def get_auth(self): if self.username is None: self.username = self.get_username() if self.password is None: self.password = self.get_password() return dict(username=self.username, password=self.password) def forget_password(self): if self.username is None: raise ValueError("forget_password: username is None") keyring.delete_password("fetch.py WAYF", self.username) def wayf_login(self, response): """Login to WAYF. Parameters ---------- response : requests.Response Login page (with username/password form) """ history = list(response.history) + [response] logger.info("Sending login details to WAYF") response = self.session.post(response.url, self.get_auth()) history += list(response.history) + [response] if 'Forkert brugernavn eller kodeord' in response.text: raise BadAuth() response = self.post_hidden_form(response) history += list(response.history) + [response] response = self.post_hidden_form(response) history += list(response.history) + [response] response.history = history[:-1] return response def relogin(self): url = ('https://bb.au.dk/webapps/bb-auth-provider-shibboleth-BBLEARN' + '/execute/shibbolethLogin?authProviderId=_102_1') response = self.get(url) if self.detect_login(response) is False: logger.error("Seems logged out after re-login. " + "Try deleting your cookiejar.") raise ParserError("Not logged in", response) return response def detect_login(self, response): document = html5lib.parse(response.content, encoding=response.encoding) logged_out_url = ( '/webapps/portal/execute/tabs/tabAction?tab_tab_group_id=_21_1') o = document.find('.//h:a[@href="%s"]' % logged_out_url, NS) if o is not None: return False log_out_id = 'topframe.logout.label' o = document.find('.//h:a[@id="%s"]' % log_out_id, NS) if o is not None: return True def post_hidden_form(self, response): """Send POST request to form with only hidden fields. Parameters ---------- response : requests.Response Page containing form with only hidden fields """ document = html5lib.parse(response.content, encoding=response.encoding) form = document.find('.//h:form', NS) url = form.get('action') inputs = form.findall('.//h:input[@name]', NS) if not inputs: raise ParserError("No <input> with name", response) post_data = {i.get('name'): i.get('value') for i in inputs} response = self.session.post(url, post_data) return response def follow_html_redirect(self, response): """Repeatedly follow HTML redirects in the page. If the given response has no HTML redirect, return it unaltered. Otherwise, return a new response by following the redirects. """ js_redirect_pattern = (r'(?:<!--)?\s*' + r'document\.location\.replace\(\'' + r'(?P<url>(?:\\.|[^\'])+)' + r'\'\);\s*' + r'(?:(?://)?-->)?\s*$') real_login_url = ( 'https://bb.au.dk/webapps/' + 'bb-auth-provider-shibboleth-BBLEARN/execute/shibbolethLogin') history = list(response.history) + [response] while True: document = html5lib.parse(response.content, encoding=response.encoding) scripts = document.findall('.//h:script', NS) next_url = None for s in scripts: t = ''.join(s.itertext()) mo = re.match(js_redirect_pattern, t) if mo: next_url = mo.group('url') break if next_url is not None: o = urlparse(next_url) p = o.netloc + o.path if p == 'bb.au.dk/webapps/login/': qs = parse_qs(o.query) try: return_url = qs['new_loc'][0] except KeyError: print("We are being redirected to %r" % (next_url, )) return_url = '' # It seems that making a GET request to this page # logs you out? if return_url == '/webapps/login/?action=relogin': logger.debug("Not setting returnUrl to %r", return_url) return_url = '' new_qs = urlencode( dict(returnUrl=return_url, authProviderId='_102_1')) next_url = '%s?%s' % (real_login_url, new_qs) response = self.session.get(next_url) history += list(response.history) + [response] continue break response.history = history[:-1] return response def autologin(self, response): """Automatically log in if necessary. If the given response is not for a login form, just follow HTML redirects and return the response. Otherwise, log in using wayf_login and get_auth. """ response = self.follow_html_redirect(response) o = urlparse(response.url) if o.netloc == 'wayf.au.dk': response = self.wayf_login(response) return response def get_edit_mode(self, response): document = html5lib.parse(response.content, encoding=response.encoding) mode_switch = document.find('.//*[@id="editModeToggleLink"]', NS) if mode_switch is not None: return 'read-on' in (mode_switch.get('class') or '').split() def ensure_edit_mode(self, response): if self.get_edit_mode(response) is False: url = ('https://bb.au.dk/webapps/blackboard/execute/' + 'doCourseMenuAction?cmd=setDesignerParticipantViewMode' + '&courseId=' + self.course_id + '&mode=designer') logger.debug("Switch to edit mode") r = self.get(url) history = (list(response.history) + [response] + list(r.history) + [r]) response = self.get(history[0].url) response.history = history + list(response.history) return response def get(self, url): response = self.autologin(self.session.get(url)) if self.detect_login(response) is False: history = response.history + [response] relogin_response = self.relogin() history += relogin_response.history + [relogin_response] response = self.autologin(self.session.get(url)) response.history = history + list(response.history) if response.url != url: history = list(response.history) + [response] response = self.session.get(url) response.history = history + list(response.history) self.log_error(response) return response def log_error(self, response): document = html5lib.parse(response.content, encoding=response.encoding) content = document.find('.//h:div[@id="contentPanel"]', NS) if content is not None: class_list = (content.get('class') or '').split() if 'error' in class_list: logger.info("contentPanel indicates an error has occurred") # raise ParserError("Error", response) def post(self, url, data, files=None, headers=None): response = self.session.post(url, data=data, files=files, headers=headers) # if response.history: # logger.warning('POST %r redirected', url) # for r in response.history: # logger.warning("... from %r", r.url) # logger.warning("... to %r", response.url) return response def ensure_logged_in(self): url = ( 'https://bb.au.dk/webapps/blackboard/content/manageDashboard.jsp' + '?course_id=%s' % self.course_id + '&sortCol=LastLoginCol&sortDir=D') self.get(url)
class BlackboardSession: def __init__(self, cookiejar, username, course_id): self.cookiejar_filename = cookiejar self.username = username self.course_id = course_id self.password = None self.cookies = LWPCookieJar(cookiejar) self.session = requests.Session() self.load_cookies() def load_cookies(self): try: self.cookies.load(ignore_discard=True) except FileNotFoundError: pass requests.cookies.merge_cookies(self.session.cookies, self.cookies) def save_cookies(self): requests.cookies.merge_cookies(self.cookies, self.session.cookies) self.cookies.save(ignore_discard=True) def get_cookie(self, key, path): try: return self.session.cookies._cookies['bb.au.dk'][path][key].value except KeyError: print(self.session.cookies._cookies) raise def get_username(self): return input("WAYF username: "******"fetch.py WAYF", self.username) if p is None: print("Please enter password for %s to store in keyring." % self.username) p = getpass.getpass() keyring.set_password("fetch.py WAYF", self.username, p) return p def get_auth(self): if self.username is None: self.username = self.get_username() if self.password is None: self.password = self.get_password() return dict(username=self.username, password=self.password) def forget_password(self): if self.username is None: raise ValueError("forget_password: username is None") keyring.delete_password("fetch.py WAYF", self.username) def wayf_login(self, response): """Login to WAYF. Parameters ---------- response : requests.Response Login page (with username/password form) """ history = list(response.history) + [response] logger.info("Sending login details to WAYF") response = self.session.post(response.url, self.get_auth()) history += list(response.history) + [response] if 'Forkert brugernavn eller kodeord' in response.text: raise BadAuth() response = self.post_hidden_form(response) history += list(response.history) + [response] response = self.post_hidden_form(response) history += list(response.history) + [response] response.history = history[:-1] return response def relogin(self): url = ( 'https://bb.au.dk/webapps/bb-auth-provider-shibboleth-BBLEARN' + '/execute/shibbolethLogin?authProviderId=_102_1') response = self.get(url) if self.detect_login(response) is False: logger.error("Seems logged out after re-login. " + "Try deleting your cookiejar.") raise ParserError("Not logged in", response) return response def detect_login(self, response): document = html5lib.parse(response.content, encoding=response.encoding) logged_out_url = ( '/webapps/portal/execute/tabs/tabAction?tab_tab_group_id=_21_1') o = document.find('.//h:a[@href="%s"]' % logged_out_url, NS) if o is not None: return False log_out_id = 'topframe.logout.label' o = document.find('.//h:a[@id="%s"]' % log_out_id, NS) if o is not None: return True def post_hidden_form(self, response): """Send POST request to form with only hidden fields. Parameters ---------- response : requests.Response Page containing form with only hidden fields """ document = html5lib.parse(response.content, encoding=response.encoding) form = document.find('.//h:form', NS) url = form.get('action') inputs = form.findall('.//h:input[@name]', NS) if not inputs: raise ParserError("No <input> with name", response) post_data = { i.get('name'): i.get('value') for i in inputs } response = self.session.post(url, post_data) return response def follow_html_redirect(self, response): """Repeatedly follow HTML redirects in the page. If the given response has no HTML redirect, return it unaltered. Otherwise, return a new response by following the redirects. """ js_redirect_pattern = ( r'(?:<!--)?\s*' + r'document\.location\.replace\(\'' + r'(?P<url>(?:\\.|[^\'])+)' + r'\'\);\s*' + r'(?:(?://)?-->)?\s*$') real_login_url = ( 'https://bb.au.dk/webapps/' + 'bb-auth-provider-shibboleth-BBLEARN/execute/shibbolethLogin') history = list(response.history) + [response] while True: document = html5lib.parse( response.content, encoding=response.encoding) scripts = document.findall('.//h:script', NS) next_url = None for s in scripts: t = ''.join(s.itertext()) mo = re.match(js_redirect_pattern, t) if mo: next_url = mo.group('url') break if next_url is not None: o = urlparse(next_url) p = o.netloc + o.path if p == 'bb.au.dk/webapps/login/': qs = parse_qs(o.query) try: return_url = qs['new_loc'][0] except KeyError: print("We are being redirected to %r" % (next_url,)) return_url = '' # It seems that making a GET request to this page # logs you out? if return_url == '/webapps/login/?action=relogin': logger.debug( "Not setting returnUrl to %r", return_url) return_url = '' new_qs = urlencode( dict(returnUrl=return_url, authProviderId='_102_1')) next_url = '%s?%s' % (real_login_url, new_qs) response = self.session.get(next_url) history += list(response.history) + [response] continue break response.history = history[:-1] return response def autologin(self, response): """Automatically log in if necessary. If the given response is not for a login form, just follow HTML redirects and return the response. Otherwise, log in using wayf_login and get_auth. """ response = self.follow_html_redirect(response) o = urlparse(response.url) if o.netloc == 'wayf.au.dk': response = self.wayf_login(response) return response def get_edit_mode(self, response): document = html5lib.parse(response.content, encoding=response.encoding) mode_switch = document.find('.//*[@id="editModeToggleLink"]', NS) if mode_switch is not None: return 'read-on' in (mode_switch.get('class') or '').split() def ensure_edit_mode(self, response): if self.get_edit_mode(response) is False: url = ('https://bb.au.dk/webapps/blackboard/execute/' + 'doCourseMenuAction?cmd=setDesignerParticipantViewMode' + '&courseId=' + self.course_id + '&mode=designer') logger.debug("Switch to edit mode") r = self.get(url) history = (list(response.history) + [response] + list(r.history) + [r]) response = self.get(history[0].url) response.history = history + list(response.history) return response def get(self, url): response = self.autologin(self.session.get(url)) if self.detect_login(response) is False: history = response.history + [response] relogin_response = self.relogin() history += relogin_response.history + [relogin_response] response = self.autologin(self.session.get(url)) response.history = history + list(response.history) if response.url != url: history = list(response.history) + [response] response = self.session.get(url) response.history = history + list(response.history) self.log_error(response) return response def log_error(self, response): document = html5lib.parse(response.content, encoding=response.encoding) content = document.find('.//h:div[@id="contentPanel"]', NS) if content is not None: class_list = (content.get('class') or '').split() if 'error' in class_list: logger.info("contentPanel indicates an error has occurred") # raise ParserError("Error", response) def post(self, url, data, files=None, headers=None): response = self.session.post( url, data=data, files=files, headers=headers) # if response.history: # logger.warning('POST %r redirected', url) # for r in response.history: # logger.warning("... from %r", r.url) # logger.warning("... to %r", response.url) return response def ensure_logged_in(self): url = ( 'https://bb.au.dk/webapps/blackboard/content/manageDashboard.jsp' + '?course_id=%s' % self.course_id + '&sortCol=LastLoginCol&sortDir=D') self.get(url)
class StreamSession(object): """ Top-level stream session interface Individual stream providers can be implemented by inheriting from this class and implementing methods for login flow, getting streams, etc. """ # SESSION_FILE=os.path.join(config.CONFIG_DIR, "session") HEADERS = {"User-agent": USER_AGENT} def __init__(self, provider_id, proxies=None, *args, **kwargs): self.provider_id = provider_id self.session = requests.Session() self.cookies = LWPCookieJar() if not os.path.exists(self.COOKIES_FILE): self.cookies.save(self.COOKIES_FILE) self.cookies.load(self.COOKIES_FILE, ignore_discard=True) self.session.headers = self.HEADERS self._state = AttrDict([("proxies", proxies)]) if proxies: self.proxies = proxies self._cache_responses = False @property def provider(self): return providers.get(self.provider_id) def login(self): pass @classmethod def session_type(cls): return cls.__name__.replace("StreamSession", "").lower() @classmethod def _COOKIES_FILE(cls): return os.path.join(config.CONFIG_DIR, f"{cls.session_type()}.cookies") @property def COOKIES_FILE(self): return self._COOKIES_FILE() @classmethod def _SESSION_FILE(cls): return os.path.join(config.CONFIG_DIR, f"{cls.session_type()}.session") @property def SESSION_FILE(self): return self._SESSION_FILE() @classmethod def new(cls, provider_id, *args, **kwargs): try: return cls.load(provider_id, **kwargs) except (FileNotFoundError, TypeError): logger.debug(f"creating new session: {args}, {kwargs}") return cls(provider_id, **kwargs) @property def cookies(self): return self.session.cookies @cookies.setter def cookies(self, value): self.session.cookies = value @classmethod def destroy(cls): if os.path.exists(cls.COOKIES_FILE): os.remove(cls.COOKIES_FILE) if os.path.exists(cls.SESSION_FILE): os.remove(cls.SESSION_FILE) @classmethod def load(cls, **kwargs): state = yaml.load(open(cls._SESSION_FILE()), Loader=AttrDictYAMLLoader) logger.trace(f"load: {cls.__name__}, {state}") return cls(provider_id, **dict(kwargs, **state)) def save(self): logger.trace(f"load: {self.__class__.__name__}, {self._state}") with open(self.SESSION_FILE, 'w') as outfile: yaml.dump(self._state, outfile, default_flow_style=False) self.cookies.save(self.COOKIES_FILE) def get_cookie(self, name): return requests.utils.dict_from_cookiejar(self.cookies).get(name) def __getattr__(self, attr): if attr in [ "delete", "get", "head", "options", "post", "put", "patch" ]: # return getattr(self.session, attr) session_method = getattr(self.session, attr) return functools.partial(self.request, session_method) # raise AttributeError(attr) @db_session def request(self, method, url, *args, **kwargs): response = None use_cache = not self.no_cache and self._cache_responses # print(self.proxies) if use_cache: logger.debug("getting cached response for %s" % (url)) e = model.CacheEntry.get(url=url) if e: # (pickled_response, last_seen) = self.cursor.fetchone() td = datetime.now() - e.last_seen if td.seconds >= self._cache_responses: logger.debug("cache expired for %s" % (url)) else: response = pickle.loads(e.response) logger.debug("using cached response for %s" % (url)) else: logger.debug("no cached response for %s" % (url)) if not response: response = method(url, *args, **kwargs) # logger.trace(dump.dump_all(response).encode("utf-8")) if use_cache and not e: pickled_response = pickle.dumps(response) e = model.CacheEntry(url=url, response=pickled_response, last_seen=datetime.now()) return response @property def headers(self): return [] @property def proxies(self): return self._state.proxies @proxies.setter def proxies(self, value): # Override proxy environment variables if proxies are defined on session if value is None: self.session.proxies = {} else: self.session.trust_env = (len(value) == 0) self._state.proxies = value self.session.proxies.update(value) @contextmanager def cache_responses(self, duration=model.CACHE_DURATION_DEFAULT): self._cache_responses = duration try: yield finally: self._cache_responses = False def cache_responses_short(self): return self.cache_responses(model.CACHE_DURATION_SHORT) def cache_responses_medium(self): return self.cache_responses(model.CACHE_DURATION_MEDIUM) def cache_responses_long(self): return self.cache_responses(model.CACHE_DURATION_LONG)
class StreamSession(object): """ Top-level stream session interface Individual stream providers can be implemented by inheriting from this class and implementing methods for login flow, getting streams, etc. """ # SESSION_FILE=os.path.join(config.CONFIG_DIR, "session") HEADERS = {"User-agent": USER_AGENT} def __init__(self, username, password, proxies=None, no_cache=False, *args, **kwargs): self.session = requests.Session() self.cookies = LWPCookieJar() if not os.path.exists(self.COOKIES_FILE): self.cookies.save(self.COOKIES_FILE) self.cookies.load(self.COOKIES_FILE, ignore_discard=True) self.session.headers = self.HEADERS self._state = AttrDict([("username", username), ("password", password), ("proxies", proxies)]) self.no_cache = no_cache self._cache_responses = False if not os.path.exists(CACHE_FILE): self.cache_setup(CACHE_FILE) self.conn = sqlite3.connect(CACHE_FILE, detect_types=sqlite3.PARSE_DECLTYPES) self.cursor = self.conn.cursor() self.cache_purge() # if not self.logged_in: self.login() # logger.debug("already logged in") # return @classmethod def session_type(cls): return cls.__name__.replace("StreamSession", "").lower() @classmethod def _COOKIES_FILE(cls): return os.path.join(config.CONFIG_DIR, f"{cls.session_type()}.cookies") @property def COOKIES_FILE(self): return self._COOKIES_FILE() @classmethod def _SESSION_FILE(cls): return os.path.join(config.CONFIG_DIR, f"{cls.session_type()}.session") @property def SESSION_FILE(self): return self._SESSION_FILE() @classmethod def new(cls, **kwargs): try: return cls.load(**kwargs) except FileNotFoundError: logger.trace(f"creating new session: {kwargs}") provider = config.settings.profile.providers.get( cls.session_type()) return cls(username=provider.username, password=provider.password, **kwargs) @property def cookies(self): return self.session.cookies @cookies.setter def cookies(self, value): self.session.cookies = value @classmethod def destroy(cls): if os.path.exists(cls.COOKIES_FILE): os.remove(cls.COOKIES_FILE) if os.path.exists(cls.SESSION_FILE): os.remove(cls.SESSION_FILE) @classmethod def load(cls, *args, **kwargs): state = yaml.load(open(cls._SESSION_FILE()), Loader=AttrDictYAMLLoader) logger.trace(f"load: {cls.__name__}, {state}") return cls(**state) def save(self): logger.trace(f"load: {self.__class__.__name__}, {self._state}") with open(self.SESSION_FILE, 'w') as outfile: yaml.dump(self._state, outfile, default_flow_style=False) self.cookies.save(self.COOKIES_FILE) def get_cookie(self, name): return requests.utils.dict_from_cookiejar(self.cookies).get(name) def __getattr__(self, attr): if attr in [ "delete", "get", "head", "options", "post", "put", "patch" ]: # return getattr(self.session, attr) session_method = getattr(self.session, attr) return functools.partial(self.request, session_method) raise AttributeError(attr) def request(self, method, url, *args, **kwargs): response = None use_cache = not self.no_cache and self._cache_responses if use_cache: logger.debug("getting cached response fsesor %s" % (url)) self.cursor.execute( "SELECT response, last_seen " "FROM response_cache " "WHERE url = ?", (url, )) try: (pickled_response, last_seen) = self.cursor.fetchone() td = datetime.now() - last_seen if td.seconds >= self._cache_responses: logger.debug("cache expired for %s" % (url)) else: response = pickle.loads(pickled_response) logger.debug("using cached response for %s" % (url)) except TypeError: logger.debug("no cached response for %s" % (url)) # if not response: # response = method(url, *args, **kwargs) # logger.trace(dump.dump_all(response).decode("utf-8")) if use_cache: pickled_response = pickle.dumps(response) sql = """INSERT OR REPLACE INTO response_cache (url, response, last_seen) VALUES (?, ?, ?)""" self.cursor.execute(sql, (url, pickled_response, datetime.now())) self.conn.commit() return response @property def username(self): return self._state.username @property def password(self): return self._state.password @property def proxies(self): return self._state.proxies @property def headers(self): return [] @proxies.setter def proxies(self, value): # Override proxy environment variables if proxies are defined on session if value is not None: self.session.trust_env = (len(value) == 0) self._state.proxies = value self.session.proxies.update(value) @contextmanager def cache_responses(self, duration=CACHE_DURATION_DEFAULT): self._cache_responses = duration try: yield finally: self._cache_responses = False def cache_responses_short(self): return self.cache_responses(CACHE_DURATION_SHORT) def cache_responses_medium(self): return self.cache_responses(CACHE_DURATION_MEDIUM) def cache_responses_long(self): return self.cache_responses(CACHE_DURATION_LONG) def cache_setup(self, dbfile): conn = sqlite3.connect(dbfile) c = conn.cursor() c.execute(''' CREATE TABLE response_cache (url TEXT, response TEXT, last_seen TIMESTAMP DEFAULT (datetime('now','localtime')), PRIMARY KEY (url))''') conn.commit() c.close() def cache_purge(self, days=CACHE_DURATION_LONG): self.cursor.execute("DELETE " "FROM response_cache " "WHERE last_seen < datetime('now', '-%d days')" % (days))