Python BeautifulSoup.form 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: bs4

클래스/타입: BeautifulSoup

메소드/함수: form

hotexamples.com에서의 예제들: 6

Python BeautifulSoup.form - 6개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 bs4.BeautifulSoup.form에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

append(30)

BeautifulSoup(30)

__str__(30)

__init__(11)

attrs(10)

__len__(8)

__repr__(3)

__unicode__(2)

article(2)

__copy__(2)

__getattr__(2)

first(2)

findAllNext(2)

feed(1)

currentTag(1)

fartind(1)

BF(1)

filter_wikilinks(1)

fina_all(1)

fnd_all(1)

h1(1)

replace_with(1)

td(1)

toCSV(1)

copy(1)

alcohol(1)

astype(1)

assign(1)

apply(1)

add_structure(1)

add_shared_term(1)

a(1)

_title(1)

_repr_html_(1)

_find_all(1)

_all_strings(1)

__getitem__(1)

__contains__(1)

NavigableString(1)

Date(1)

wrap(1)

예제 #1

파일 보기

파일: miratuserie.py 프로젝트: dbenders/tvdir

    def scrap_vidbull(self, episode, video_id):
        import ipdb; ipdb.set_trace()
        url = "http://vidbull.com/%s" % video_id
        html = urllib.urlopen(url).read()
        soup = BeautifulSoup(html, from_encoding='latin-1')
        data = {}
        for elem in soup.find('form',attrs={'name':'F1'})('input'):
            if elem.get('name'):
                data[elem.get('name')] = elem.get('value')
        
        html = urllib.urlopen(url, urllib.urlencode(data)).read()
        soup = BeautifulSoup(html, from_encoding='latin-1')
        data = {}
        for elem in soup.form('input'):
            if elem.get('name'):
                data[elem.get('name')] = elem.get('value')

        html = urllib.urlopen(url, urllib.urlencode(data)).read()
        video_url = re.findall('a href="(.*?mp4)"', html)[0]

        media = HttpMediaFile(episode=episode)
        media.save()

        episode.subtitles = "http://sc.miratuserie.com/episodes/subs/%s.srt" % video_id.split('.')[0]
        episode.save()

예제 #2

파일 보기

    def updateScores(self, field, scoreDict):
        s = self.s
        print("Updating scores...")
        
        html = s.get('https://www.gradesource.com/editscores1.asp?id=%s' % field, cookies = self.cookies).content
        
        returnOutput = {}
        totalCount = re.compile('<td nowrap colspan=3 class=BT>&nbsp;&nbsp;Maximum Points: &nbsp;&nbsp;<font color="#336699"><b>(.*)</b></font></td>')
        maximumScore = totalCount.search(html).group(1).strip()
        for k,v in scoreDict.items():
            
            if (v == ""): 
                value = -1
            else:
                value = float(v)
            maxScore = float(maximumScore) 
            if(value > maxScore):
                
                print(k + " has a score of " + v + " which is larger than the maximum score of " + maximumScore)
                returnOutput[k] = v
                   
        
        nomnomsoup = BeautifulSoup(html)
        updatePOSTDict = {}
        updateIDDict = {}

        for x in nomnomsoup.form('input', id = re.compile("^student")):
            
            studentNumber = re.compile('input id="(.*)" name=')
            studentString = studentNumber.search(str(x))
            studStr = studentString.group(1).strip()
            
            
            gradesourceNumber = re.compile('type="hidden" value="(.*)"')
            x =  x.findNext("input")
            gradesourceString = gradesourceNumber.search(str(x))
            gradStr = gradesourceString.group(1).strip()

            updatePOSTDict[studStr] = gradStr
            
            idNumber = re.compile('input name="id(.*)" type="hidden"')
            idString = idNumber.search(str(x))
            idString = "id"+str(idString.group(1).strip())

            updateIDDict[idString] = gradStr

            

        
        joinedDictA = {}
        saveAccount = self.savedAccount
        
        for key in saveAccount.keys():
            try:
                joinedDictA[key] = scoreDict[saveAccount[key]]
            except Exception, e:
                print(saveAccount[key] + " was found in Gradesource but not in the CSV.")
                continue

예제 #3

파일 보기

파일: gradesourcesession.py 프로젝트: idf/AutoGradeSource

    def updateScores(self, field, scoreDict):
        s = self.s
        print("Updating scores...")
        # Grabs the website
        html = s.get('https://www.gradesource.com/editscores1.asp?id=%s' % field, cookies = self.cookies).content
        # Grabs the max score and runs a check on if any scores are over the maximum
        returnOutput = {}
        totalCount = re.compile('<td nowrap colspan=3 class=BT>&nbsp;&nbsp;Maximum Points: &nbsp;&nbsp;<font color="#336699"><b>(.*)</b></font></td>')
        maximumScore = totalCount.search(html).group(1).strip()
        for k,v in scoreDict.items():
            # Edge case in which the score wasn't inputed
            if (v == ""): 
                value = -1
            else:
                value = float(v)
            maxScore = float(maximumScore) 
            if(value > maxScore):
                # Throw warning, incase someone has a score of 11/10. Therefore they're not recorded
                print(k + " has a score of " + v + " which is larger than the maximum score of " + maximumScore)
                returnOutput[k] = v
                   
        # nomnom soup magic
        nomnomsoup = BeautifulSoup(html)
        updatePOSTDict = {}
        updateIDDict = {}

        for x in nomnomsoup.form('input', id = re.compile("^student")):
            # Grabs the student Number
            studentNumber = re.compile('input id="(.*)" name=')
            studentString = studentNumber.search(str(x))
            studStr = studentString.group(1).strip()
            
            # Grabs the gradesource Number
            gradesourceNumber = re.compile('type="hidden" value="(.*)"')
            x =  x.findNext("input")
            gradesourceString = gradesourceNumber.search(str(x))
            gradStr = gradesourceString.group(1).strip()

            updatePOSTDict[studStr] = gradStr
            # Grabs the id Number
            idNumber = re.compile('input name="id(.*)" type="hidden"')
            idString = idNumber.search(str(x))
            idString = "id"+str(idString.group(1).strip())

            updateIDDict[idString] = gradStr

            # break

        # Some Innerjoin magic? yay for SQL concepts!
        joinedDictA = {}
        saveAccount = self.savedAccount
        #InnerJoin saveAccount (gradesourceNumber, email) and scoreDict (email, score) to (gradesourceNumber, score)
        for key in saveAccount.keys():
            try:
                joinedDictA[key] = scoreDict[saveAccount[key]]
            except Exception, e:
                print(saveAccount[key] + " was found in Gradesource but not in the CSV.")
                continue

예제 #4

파일 보기

파일: gradesourcesession.py 프로젝트: shubhamsaini/Gradesource-Uploader

    def updateScores(self, field, scoreDict):
        s = self.s
        print("Updating scores...")
        # Grabs the website
        html = s.get('https://www.gradesource.com/editscores1.asp?id=%s' %
                     field,
                     cookies=self.cookies).content
        # Grabs the max score and runs a check on if any scores are over the maximum
        returnOutput = {}
        totalCount = re.compile(
            '<td nowrap colspan=3 class=BT>&nbsp;&nbsp;Maximum Points: &nbsp;&nbsp;<font color="#336699"><b>(.*)</b></font></td>'
        )
        maximumScore = totalCount.search(html).group(1).strip()
        for k, v in scoreDict.items():
            # Edge case in which the score wasn't inputed
            if (v == ""):
                value = -1
            else:
                value = float(v)
            maxScore = float(maximumScore)
            if (value > maxScore):
                # Throw warning, incase someone has a score of 11/10. Therefore they're not recorded
                print(k + " has a score of " + v +
                      " which is larger than the maximum score of " +
                      maximumScore)
                returnOutput[k] = v

        # nomnom soup magic
        nomnomsoup = BeautifulSoup(html)
        updatePOSTDict = {}
        updateIDDict = {}

        for x in nomnomsoup.form('input', id=re.compile("^student")):
            # Grabs the student Number
            studentNumber = re.compile('input id="(.*)" name=')
            studentString = studentNumber.search(str(x))
            studStr = studentString.group(1).strip()
            # Grabs the gradesource Number
            gradesourceNumber = re.compile('type="hidden" value="(.*)">')
            gradesourceString = gradesourceNumber.search(str(x))
            gradStr = gradesourceString.group(1).strip()
            updatePOSTDict[studStr] = gradStr
            # Grabs the id Number
            idNumber = re.compile('input name="id(.*)" type="hidden"')
            idString = idNumber.search(str(x))
            updateIDDict[str("id" + idString.group(1).strip())] = gradStr
        # Some Innerjoin magic? yay for SQL concepts!
        joinedDictA = {}
        saveAccount = self.savedAccount
        #InnerJoin saveAccount (gradesourceNumber, email) and scoreDict (email, score) to (gradesourceNumber, score)
        for key in saveAccount.keys():
            try:
                joinedDictA[key] = scoreDict[saveAccount[key]]
            except Exception, e:
                print(saveAccount[key] +
                      " was found in Gradesource but not in the CSV.")
                continue

예제 #5

파일 보기

파일: amonshare.py 프로젝트: dbenders/tvdir

    def get_file(self, video_id):
        url = "http://amonshare.com/%s" % video_id
        html = urllib.urlopen(url).read()
        soup = BeautifulSoup(html, from_encoding="latin-1")
        data = {}
        for elem in soup.form("input"):
            if elem.get("name"):
                data[elem.get("name")] = elem.get("value")
        html = urllib.urlopen(url, urllib.urlencode(data)).read()

        soup = BeautifulSoup(html, from_encoding="latin-1")
        data = {}
        for elem in soup.form("input"):
            if elem.get("name"):
                data[elem.get("name")] = elem.get("value")
        html = urllib.urlopen(url, urllib.urlencode(data)).read()
        video_url = re.findall('href="([^"]+).mp4"', html)[0] + ".mp4"
        return video_url

예제 #6

파일 보기

파일: filebox.py 프로젝트: dbenders/tvdir

 def get_file(self, video_id):
     url = "http://filebox.com/%s" % video_id
     html = urllib.urlopen(url).read()
     soup = BeautifulSoup(html, from_encoding='latin-1')
     data = {}
     for elem in soup.form('input'):
         if elem.get('name'):
             data[elem.get('name')] = elem.get('value')
     html = urllib.urlopen(url, urllib.urlencode(data)).read()
     video_url = re.findall("this.play\('([^']+)'\)", html)[0]
     return video_url