def test_session_load(self): """ loads the session of the last test and makes a request """ with LoggedInSession.get_session() as session: r = session.s.get(links['home']) self.assertEqual(session.check_login(r.content), True)
def test_home(self): session = LoggedInSession.get_session() for key in links.keys(): self.assertEqual( session.session.get(links[key]).status_code, 200 )
def test_refresh_session(self): """ tests the refresh of expiring captchas / session ids the implemented 'get' should refresh both by itself using the check_login method - generate a session - to make the first session expire generate a new session - check if sessions have access to the overview page """ session = LoggedInSession.load_session() session2 = LoggedInSession.get_session(new_session=True) # old session got deleted server side r = session.s.get(links['home']) self.assertEqual(session.check_login(r.content, build=False), False) # but the new one should work r = session2.s.get(links['home']) self.assertEqual(session2.check_login(r.content, build=False), True) # with the new get method the session should get refreshed r = session.get(links['home']) self.assertEqual(session.check_login(r.content, build=False), True)
def test_session_save(self): """ tests if login with credentials and the session saving is working properly - first the last session gets deleted - a new session is generated via contextmanager - check if file is available """ path = Path(BASE_DIR, session_file) os.remove(path) session = LoggedInSession.get_session() session.save_session() self.assertEqual(os.path.isfile(path), True)
def test_check_login(self): """ check if we are on start / captcha page don't take live data for consistency purposes - load local copies of the pages """ session = LoggedInSession.get_session() # Load different html with open(Path(RES_DIR, 'home/home_account.html'), 'r') as f: overview_html = f.read() self.assertEqual( session.check_login(links['home'], build=False), True )
params = { 'action': 'ajax', '1': 'false', '2': 'false', '3': 'false', '4': 'false', '5': 'true', # Einheitenmarkt '6': 'false', '7': 'false', '8': 'false', '9': 'false', '10': 'false', } if __name__ == '__main__': session = LoggedInSession.get_session() r = session.get(links['news'], params=params) sales = map( list, sales_regex.findall(r.content) # + tender_regex.findall(r.content) ) for sale in sales: sale[0] = string_to_int(sale[0]) sale[2] = string_to_int(sale[2]) df = pd.DataFrame(sales) grouped = df.groupby(1).sum() print(grouped) pd.set_option('display.max_rows', 1000)
from unipath import Path from api import LoggedInSession from syn_utils import overview_link, RES_DIR from scraper.rankings import syndicate_link def update_captcha(session): session.img.save( Path(RES_DIR, 'captcha/new_captcha.png') ) def update_syndicates(session): c = session.s.get(syndicate_link(11)).content with open(Path(RES_DIR, 'syndicate/new_syndicate.html'), 'w') as f: f.write(c) def update_home(session): c = session.s.get(overview_link).content with open(Path(RES_DIR, 'home/home_account.html'), 'w') as f: f.write(c) if __name__ == '__main__': with LoggedInSession.get_session() as session: # update_captcha(session) # update_syndicates(session) update_home(session)