def main(): fields = { 'USER': username, 'PASSWORD': password, 'REALMOID': '06-40da262c-b9d7-00ef-0000-28ff000028ff', 'TARGET': 'https://www.pge.com/myenergyweb/appmanager/pge/customer', 'SMAUTHREASON': 0, 'FCC': 'DEFAULT', 'PROTOCOL': 'DEFAULT' } pge = GetResponseSoup('https://www.pge.com/eum/login', fields) # Get to the statements page pge.update('https://www.pge.com/myenergyweb/appmanager/pge/customer?_nfpb=true&_pageLabel=BillingPaymentHistory&_nfls=false') transaction_table = pge.soup.find('table', {'id': 'transaction-history-table'}) downloadable_transaction_rows = [ dict( filename=format_datestring(row.find('span', text=re.compile(r'\d{2}/\d{2}/\d{2}'))), url=row.find('a', {'class': 'download-pdf-lft'})['href'] ) for row in transaction_table.findAll('tr') if row.find('a', {'class': 'download-pdf-lft'}, text='Download') ] pge.download_invoices(downloadable_transaction_rows, SAVE_PATH)
def main(): field_names = [ "__EVENTTARGET", "__VIEWSTATE", "__EVENTARGUMENT", "__VIEWSTATEGENERATOR", "__EVENTVALIDATION", "__LASTFOCUS", ] fields = { "ctl00$StaticContentWebView1$staticContentLocation": "StaticContent/NWEHeaderIndex.htm", "ctl00$body_content$txtUsername": username, "ctl00$body_content$txtPassword": password, "ctl00$StaticContentWebView2$staticContentLocation": "StaticContent/NWEFooterIndex.htm", "ctl00$body_content$btnLogin.x": "25", "ctl00$body_content$btnLogin.y": "11", } nwe = GetResponseSoup(login_url) # Extract the field names from the site extract_fields(nwe, fields, field_names) # Logging in nwe.update(login_url, fields) # Go to Payments page where the invoices are nwe.update(billhistory_url) prepare_and_download(nwe) # Get all the account numbers on my account nwe.update(billhistory_url) account_numbers = [o['value'] for o in nwe.soup.find('select', attrs={'name': 'ctl00$AccountSummaryHeaderControl1$headerAccountSelector'}).findAll('option') if o['value'] != '3168266-9'] field_names.append("ctl00$body_content$AccountSummaryDynamicLinksView1$hdSpeedPayAccountNumber") field_names.append("ctl00$body_content$AccountSummaryDynamicLinksView1$hdSpeedPayState") for account_number in account_numbers: if nwe.url != billhistory_url: nwe.update(billhistory_url) fields = { "ctl00$StaticContentWebView1$staticContentLocation": "StaticContent/NWEHeader.htm", "TC08BCDB1053_ctl00_ctl00_siteMapControl_customnavigation_ClientState": "", "ctl00$AccountSummaryHeaderControl1$headerAccountSelector": account_number, "ctl00$body_content$AccountSummaryDynamicLinksView1$hdSpeedPayURL:https": "//paynow7.speedpay.com/northwestern/index.asp", "ctl00$body_content$StaticContentWebView1$staticContentLocation": "StaticContent/BillViewRequirements.htm", "ctl00$StaticContentWebView2$staticContentLocation": "StaticContent/NWEFooter.htm" } # Extract the field names from the site extract_fields(nwe, fields, field_names) nwe.update(billhistory_url, fields) prepare_and_download(nwe)
def main(): # Extract view_state and event_validation variables: field_names = [ r'__EVENTARGUMENT', r'__VIEWSTATE', r'__EVENTVALIDATION', r'__VIEWSTATEGENERATOR', r'__VIEWSTATEENCRYPTED', r'__EVENTVALIDATION', ] fields = { r'dnn$ctr487$Login$Login_DNN$txtUsername': username, r'dnn$ctr487$Login$Login_DNN$txtPassword': password, r'__ASYNCPOST': 'true', r'ScrollTop': '301', r'ScriptManager_TSM' : ';;System.Web.Extensions, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35:en:f319b152-218f-4c14-829d-050a68bb1a61:ea597d4b:b25378d2;Telerik.Web.UI, Version=2012.2.724.35, Culture=neutral, PublicKeyToken=121fae78165ba3d4:en:3fe22950-1961-4f26-b9d4-df0df7356bf6:16e4e7cd:f7645509:ed16cbdc', r'StylesheetManager_TSSM': ';Telerik.Web.UI, Version=2012.2.724.35, Culture=neutral, PublicKeyToken=121fae78165ba3d4:en-US:3fe22950-1961-4f26-b9d4-df0df7356bf6:45085116:27c5704c', r'ScriptManager': r'dnn$ctr487$dnn$ctr487$Login_UPPanel|dnn$ctr487$Login$Login_DNN$cmdLogin', r'__EVENTTARGET': 'dnn$ctr487$Login$Login_DNN$cmdLogin', r'__dnnVariable': {"__scdoff":"1","containerid_dnn_ctr395_ModuleContent":"395","cookieid_dnn_ctr395_ModuleContent":"_Module395_Visible","min_icon_395":"/Portals/_default/Containers/eCAReDefault/ar_closemenu.gif","max_icon_395":"/Portals/_default/Containers/eCAReDefault/ar_openmenu.gif","max_text":"Maximize","min_text":"Minimize"}, r'RadAJAXControlID': r'dnn_ctr487_Login_UP', } # Login page (but not logging in) mw = GetResponseSoup(login_url) # Extract the field names from the site for field in field_names: find_field = mw.soup.find(id=field) fields[field] = find_field['value'] if find_field else '' # Actually log in mw.update(login_url, fields) mw.update('http://myaccount.mtnwater.com/Home.aspx') transaction_table = mw.soup.find('table', {'id': 'dnn_ctr479_BillingHistory_GridView1'}) transaction_rows = [row for row in transaction_table.findAll('tr') if not row.find('th')] downloadable_transactions = [ dict( filename=format_datestring(row.find('a', attrs={'href': re.compile(r'^javascript')}).text), url=row.find('a', {'href': re.compile(r'onlinebiller')})['href'] ) for row in transaction_rows ] mw.download_invoices(downloadable_transactions, SAVE_PATH)
def main(): # Extract view_state and event_validation variables: field_names = [ r'__EVENTARGUMENT', r'__VIEWSTATE', r'__EVENTVALIDATION', r'__VIEWSTATEGENERATOR', r'__VIEWSTATEENCRYPTED', r'__EVENTVALIDATION', ] fields = { r'dnn$ctr487$Login$Login_DNN$txtUsername': username, r'dnn$ctr487$Login$Login_DNN$txtPassword': password, r'__ASYNCPOST': 'true', r'ScrollTop': '301', r'ScriptManager_TSM': ';;System.Web.Extensions, Version=4.0.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35:en:f319b152-218f-4c14-829d-050a68bb1a61:ea597d4b:b25378d2;Telerik.Web.UI, Version=2012.2.724.35, Culture=neutral, PublicKeyToken=121fae78165ba3d4:en:3fe22950-1961-4f26-b9d4-df0df7356bf6:16e4e7cd:f7645509:ed16cbdc', r'StylesheetManager_TSSM': ';Telerik.Web.UI, Version=2012.2.724.35, Culture=neutral, PublicKeyToken=121fae78165ba3d4:en-US:3fe22950-1961-4f26-b9d4-df0df7356bf6:45085116:27c5704c', r'ScriptManager': r'dnn$ctr487$dnn$ctr487$Login_UPPanel|dnn$ctr487$Login$Login_DNN$cmdLogin', r'__EVENTTARGET': 'dnn$ctr487$Login$Login_DNN$cmdLogin', r'__dnnVariable': { "__scdoff": "1", "containerid_dnn_ctr395_ModuleContent": "395", "cookieid_dnn_ctr395_ModuleContent": "_Module395_Visible", "min_icon_395": "/Portals/_default/Containers/eCAReDefault/ar_closemenu.gif", "max_icon_395": "/Portals/_default/Containers/eCAReDefault/ar_openmenu.gif", "max_text": "Maximize", "min_text": "Minimize" }, r'RadAJAXControlID': r'dnn_ctr487_Login_UP', } # Login page (but not logging in) mw = GetResponseSoup(login_url) # Extract the field names from the site for field in field_names: find_field = mw.soup.find(id=field) fields[field] = find_field['value'] if find_field else '' # Actually log in mw.update(login_url, fields) mw.update('http://myaccount.mtnwater.com/Home.aspx') transaction_table = mw.soup.find( 'table', {'id': 'dnn_ctr479_BillingHistory_GridView1'}) transaction_rows = [ row for row in transaction_table.findAll('tr') if not row.find('th') ] downloadable_transactions = [ dict(filename=format_datestring( row.find('a', attrs={ 'href': re.compile(r'^javascript') }).text), url=row.find('a', {'href': re.compile(r'onlinebiller')})['href']) for row in transaction_rows ] mw.download_invoices(downloadable_transactions, SAVE_PATH)
def main(): field_names = [ "__EVENTTARGET", "__VIEWSTATE", "__EVENTARGUMENT", "__VIEWSTATEGENERATOR", "__EVENTVALIDATION", "__LASTFOCUS", ] fields = { "ctl00$StaticContentWebView1$staticContentLocation": "StaticContent/NWEHeaderIndex.htm", "ctl00$body_content$txtUsername": username, "ctl00$body_content$txtPassword": password, "ctl00$StaticContentWebView2$staticContentLocation": "StaticContent/NWEFooterIndex.htm", "ctl00$body_content$btnLogin.x": "25", "ctl00$body_content$btnLogin.y": "11", } nwe = GetResponseSoup(login_url) # Extract the field names from the site extract_fields(nwe, fields, field_names) # Logging in nwe.update(login_url, fields) # Go to Payments page where the invoices are nwe.update(billhistory_url) prepare_and_download(nwe) # Get all the account numbers on my account nwe.update(billhistory_url) account_numbers = [ o['value'] for o in nwe.soup.find( 'select', attrs={ 'name': 'ctl00$AccountSummaryHeaderControl1$headerAccountSelector' }).findAll('option') if o['value'] != '3168266-9' ] field_names.append( "ctl00$body_content$AccountSummaryDynamicLinksView1$hdSpeedPayAccountNumber" ) field_names.append( "ctl00$body_content$AccountSummaryDynamicLinksView1$hdSpeedPayState") for account_number in account_numbers: if nwe.url != billhistory_url: nwe.update(billhistory_url) fields = { "ctl00$StaticContentWebView1$staticContentLocation": "StaticContent/NWEHeader.htm", "TC08BCDB1053_ctl00_ctl00_siteMapControl_customnavigation_ClientState": "", "ctl00$AccountSummaryHeaderControl1$headerAccountSelector": account_number, "ctl00$body_content$AccountSummaryDynamicLinksView1$hdSpeedPayURL:https": "//paynow7.speedpay.com/northwestern/index.asp", "ctl00$body_content$StaticContentWebView1$staticContentLocation": "StaticContent/BillViewRequirements.htm", "ctl00$StaticContentWebView2$staticContentLocation": "StaticContent/NWEFooter.htm" } # Extract the field names from the site extract_fields(nwe, fields, field_names) nwe.update(billhistory_url, fields) prepare_and_download(nwe)
def main(): fields = { r'username': username, r'password': password, r'login-form-type': r'pwd', } rg = GetResponseSoup(login_url, fields) rg.update( 'https://secure.republiconline.com/secureeportal/_layouts/fissinglesignon.aspx' ) authTkn = rg.soup.find('input', {'name': 'authTkn'})['value'] keyTkn = rg.soup.find('input', {'name': 'keyTkn'})['value'] fields = { r'slchannel': 'ALWRSA', r'client': '701122300', r'unitCode': 'ALW', r'type': 'ApplicationMenu', r'enc': 'web', r'authTkn': authTkn, r'keyTkn': keyTkn } rg.update('https://secure3.billerweb.com/alw/inetSrv', fields) # At this point, we're at the page where the invoices are listed. # Let's find the invoice IDs def clean_due_date(s): cleaned_up = s.strip().replace('/', '-').replace('Due: ', '') # now looks like MM-DD-YYYY month, day, year = cleaned_up.split('-') return "{0}-{1}-{2}".format(year, month, day) def invoice_info(link): doc_id_search = re.compile(r'viewDocument\(\'(\d+)\'') find_id = doc_id_search.search(link['href']) doc_name = "{} {}".format(clean_due_date(link.contents[-1]), link.contents[0].strip().replace(': ', '-')) return dict(filename=doc_name, id=find_id.group(1)) invoice_links = rg.soup.findAll('a', href=re.compile('viewDocument')) doc_ids = map(invoice_info, invoice_links) sessionHandle = rg.soup.find('input', {'name': 'sessionHandle'})['value'] client = rg.soup.find('input', {'name': 'client'})['value'] for invoice in doc_ids: fields = { r'sessionHandle': sessionHandle, r'type': 'UserService', r'action': 'ViewDocument', r'client': client, r'logoutUrl': '', r'operation': "search", r'startPos': '0', r'newBean': '', r'mode': '', r'documentId': invoice['id'], r'accountServiceId': '4', r'documentSetId': '', r'invoiceNumber': '', r'selectedAccount': '', r'accountSelection': '', r'payDocumentId': '', r'accountSelection': 'allActiveAccounts', } rg.update('https://secure3.billerweb.com/alw/inetSrv', fields) # src = rg.soup.find('frame')['src'].replace('/alw/inetSrv', '') src = rg.soup.find('frame')['src'] parsed_url = urlparse(src) parsed_query = parse_qs(parsed_url.query) to_open = 'https://secure3.billerweb.com/alw/inetSrv/document.pdf' fields = { 'action': 'ShowPdf', 'type': parsed_query['type'][0], 'client': parsed_query['client'][0], 'sessionHandle': parsed_query['sessionHandle'][0], 'hasToc': 'false', 'beginPage': '1', 'endPage': '2', 'docId': invoice['id'] } # there has to be a cleaner way to do this url_to_open = urlunparse( list(urlparse(to_open))[:4] + [urllib.urlencode(fields)] + ['']) invoice['url'] = url_to_open rg.download_invoices(doc_ids, SAVE_PATH)
def main(): fields = { r'username': username, r'password': password, r'login-form-type': r'pwd', } rg = GetResponseSoup(login_url, fields) rg.update('https://secure.republiconline.com/secureeportal/_layouts/fissinglesignon.aspx') authTkn = rg.soup.find('input', {'name': 'authTkn'})['value'] keyTkn = rg.soup.find('input', {'name': 'keyTkn'})['value'] fields = { r'slchannel': 'ALWRSA', r'client': '701122300', r'unitCode': 'ALW', r'type': 'ApplicationMenu', r'enc': 'web', r'authTkn': authTkn, r'keyTkn': keyTkn } rg.update('https://secure3.billerweb.com/alw/inetSrv', fields) # At this point, we're at the page where the invoices are listed. # Let's find the invoice IDs def clean_due_date(s): cleaned_up = s.strip().replace('/', '-').replace('Due: ', '') # now looks like MM-DD-YYYY month, day, year = cleaned_up.split('-') return "{0}-{1}-{2}".format(year, month, day) def invoice_info(link): doc_id_search = re.compile(r'viewDocument\(\'(\d+)\'') find_id = doc_id_search.search(link['href']) doc_name = "{} {}".format(clean_due_date(link.contents[-1]), link.contents[0].strip().replace(': ', '-')) return dict( filename=doc_name, id=find_id.group(1) ) invoice_links = rg.soup.findAll('a', href=re.compile('viewDocument')) doc_ids = map(invoice_info, invoice_links) sessionHandle = rg.soup.find('input', {'name': 'sessionHandle'})['value'] client = rg.soup.find('input', {'name': 'client'})['value'] for invoice in doc_ids: fields = { r'sessionHandle': sessionHandle, r'type': 'UserService', r'action': 'ViewDocument', r'client': client, r'logoutUrl': '', r'operation': "search", r'startPos': '0', r'newBean': '', r'mode': '', r'documentId': invoice['id'], r'accountServiceId': '4', r'documentSetId': '', r'invoiceNumber': '', r'selectedAccount': '', r'accountSelection': '', r'payDocumentId': '', r'accountSelection': 'allActiveAccounts', } rg.update('https://secure3.billerweb.com/alw/inetSrv', fields) # src = rg.soup.find('frame')['src'].replace('/alw/inetSrv', '') src = rg.soup.find('frame')['src'] parsed_url = urlparse(src) parsed_query = parse_qs(parsed_url.query) to_open = 'https://secure3.billerweb.com/alw/inetSrv/document.pdf' fields = { 'action': 'ShowPdf', 'type': parsed_query['type'][0], 'client': parsed_query['client'][0], 'sessionHandle': parsed_query['sessionHandle'][0], 'hasToc': 'false', 'beginPage': '1', 'endPage': '2', 'docId': invoice['id'] } # there has to be a cleaner way to do this url_to_open = urlunparse(list(urlparse(to_open))[:4] + [urllib.urlencode(fields)] + ['']) invoice['url'] = url_to_open rg.download_invoices(doc_ids, SAVE_PATH)