Python get_all_text 예제들, bidmap.htmlparse.soupify.get_all_text Python 예제들

예제 #1

0

파일 보기

파일: bidsaspx.py 프로젝트: thayton/bidmap

    def scrape_bid_description(self, bid):
        self.br.open(bid.url)

        s = soupify(self.br.response().read())
        x = {'summary': 'Bid Details'}
        t = s.find('table', attrs=x)

        c = re.compile(r'^Contact', re.IGNORECASE)
        f = lambda x: x.name == 'span' and 'BidListHeader' in x.attrs.get(
            'class', []) and re.search(c, x.text)
        p = t.find(f)

        if p:
            tr = p.findNext('tr')
            bid.contact = tr.text.strip()

            e = re.compile(r'\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b',
                           re.IGNORECASE)  # email regex
            v = tr.find(text=e)

            if v:
                m = re.search(e, v)
                bid.email = m.group(0)

        bid.description = get_all_text(t)
        bid.save()

예제 #2

0

파일 보기

    def scrape_bid_description(self, bid):
        self.br.open(bid.url)

        s = soupify(self.br.response().read())
        r = re.compile(r'^Agent')
        f = lambda x: x.name == 'b' and re.search(r, x.text)
        b = s.find(f)

        if b:
            t = b.findParent('table')
            bid.contact = get_all_text(t)

        b = s.find('blockquote')

        bid.description = get_all_text(b)
        bid.save()

예제 #3

0

파일 보기

파일: saint-marys.py 프로젝트: thayton/bidmap

    def scrape_bids(self):
        self.br.open(self.org.bids_page_url)

        s = soupify(self.br.response().read())
        r = re.compile(r'^\s*Solicitation')
        f = lambda x: x.name == 'td' and re.search(r, x.text)

        self.org.bid_set.all().delete()

        for td in s.findAll(f):
            tr = td.findParent('tr')
            td = tr.findAll('td')

            bid = Bid(org=self.org)
            bid.title = td[0].text
            bid.url = self.br.geturl()
            bid.description = get_all_text(td[1])

            z = re.search(self.date_regex, td[-3].text)
            if z:
                m, d, y = z.groups()
                bid.due_date = datetime.date(day=int(d),
                                             month=int(m),
                                             year=int(y))

            bid.save()

예제 #4

0

파일 보기

파일: docscraper.py 프로젝트: thayton/bidmap

    def scrape_bid_description(self, bid):
        self.br.open(bid.url)

        d = self.br.response().read()
        s = soupify(doctohtml(d))

        bid.description = get_all_text(s.html.body)
        bid.save()

예제 #5

0

파일 보기

파일: pdfscraper.py 프로젝트: thayton/bidmap

    def scrape_bid_description(self, bid):
        self.br.open(bid.url)

        d = self.br.response().read()
        s = soupify(pdftohtml(d))

        bid.description = get_all_text(s.html.body)
        bid.save()

예제 #6

0

파일 보기

파일: bidview.py 프로젝트: thayton/bidmap

    def scrape_bid_description(self, bid):
        self.br.open(bid.url)

        s = soupify(self.br.response().read())
        x = {'class': 'bidViewResultsLeft'}
        t = s.find('td', attrs=x)

        bid.description = get_all_text(t)
        bid.save()

예제 #7

0

파일 보기

    def scrape_bid_description(self, bid):
        self.br.open(bid.url)

        s = soupify(self.br.response().read())
        x = {'class': 'bidViewResultsLeft'}
        t = s.find('td', attrs=x)

        bid.description = get_all_text(t)
        bid.save()

예제 #8

0

파일 보기

    def scrape_bid_description(self, bid):
        self.br.open(bid.url)

        s = soupify(self.br.response().read())
        x = {'class': 'ms-formtable'}
        t = s.find('table', attrs=x)

        bid.description = get_all_text(t)
        bid.save()

예제 #9

0

파일 보기

    def scrape_bid_description(self, bid):
        self.br.open(bid.url)

        s = soupify(self.br.response().read())
        x = {'class': 'content'}
        d = s.find('div', attrs=x)

        bid.description = get_all_text(d)
        bid.save()

예제 #10

0

파일 보기

파일: mariettaga.py 프로젝트: thayton/bidmap

    def scrape_bid_description(self, bid):
        self.br.open(bid.url)

        s = soupify(self.br.response().read())
        r = re.compile(r'PurchasingBids')
        t = s.find('table', id=r)

        bid.description = get_all_text(t)
        bid.save()

예제 #11

0

파일 보기

파일: cityofcovington.py 프로젝트: thayton/bidmap

    def scrape_bid_description(self, bid):
        self.br.open(bid.url)

        s = soupify(self.br.response().read())
        x = {'class': 'ms-formtable'}
        t = s.find('table', attrs=x)

        bid.description = get_all_text(t)
        bid.save()

예제 #12

0

파일 보기

파일: cityofwinder.py 프로젝트: thayton/bidmap

    def scrape_bid_description(self, bid):
        self.br.open(bid.url)

        s = soupify(self.br.response().read())
        x = {'class': 'content'}
        d = s.find('div', attrs=x)

        bid.description = get_all_text(d)
        bid.save()

예제 #13

0

파일 보기

파일: pgebid.py 프로젝트: thayton/bidmap

    def scrape_bid_description(self, bid):
        self.br.open(bid.url)

        s = soupify(self.br.response().read())
        h = s.find('h1')
        t = h.findParent('table')

        bid.description = get_all_text(t)
        bid.save()

예제 #14

0

파일 보기

    def scrape_bid_description(self, bid):
        self.br.open(bid.url)

        s = soupify(self.br.response().read())
        h = s.find('h1')
        t = h.findParent('table')

        bid.description = get_all_text(t)
        bid.save()

예제 #15

0

파일 보기

파일: alpharetta.py 프로젝트: thayton/bidmap

    def scrape_bid_description(self, bid):
        self.br.open(bid.url)

        s = soupify(self.br.response().read())
        t = s.find(text=re.compile(r'^Contact:'))

        if t:
            p = t.findParent('p')
            bid.contact = get_all_text(p)

        f = lambda x: x.name == 'a' and x.text == 'Download Bid Package'
        a = s.find(f)
        u = urlparse.urljoin(self.br.geturl(), a['href'])

        self.br.open(u)

        d = self.br.response().read()
        s = soupify(pdftohtml(d))

        bid.description = get_all_text(s.html.body)
        bid.save()

예제 #16

0

파일 보기

파일: mcdonoughga.py 프로젝트: thayton/bidmap

    def scrape_bid_description(self, bid):
        self.br.open(bid.url)

        s = soupify(self.br.response().read())
        r = re.compile(r'showdocument\?id=\d+$')
        a = s.find('a', href=r)
        u = urlparse.urljoin(self.br.geturl(), a['href'])

        self.br.open(u)

        d = self.br.response().read()
        s = soupify(pdftohtml(d))

        bid.description = get_all_text(s.html.body)
        bid.save()

예제 #17

0

파일 보기

파일: mcdonoughga.py 프로젝트: thayton/bidmap

    def scrape_bid_description(self, bid):
        self.br.open(bid.url)

        s = soupify(self.br.response().read())
        r = re.compile(r'showdocument\?id=\d+$')
        a = s.find('a', href=r)
        u = urlparse.urljoin(self.br.geturl(), a['href'])

        self.br.open(u)

        d = self.br.response().read()
        s = soupify(pdftohtml(d))

        bid.description = get_all_text(s.html.body)
        bid.save()

예제 #18

0

파일 보기

    def scrape_bid_description(self, bid):
        self.br.open(bid.url)
        
        s = soupify(self.br.response().read())
        x = {'class': 'body-content'}
        y = {'class': 'promo'}
        n = s.find('section', attrs=y)

        bid.contact = n.text

        m = re.search(self.email_regex, n.text)
        if m:
            bid.email = m.group(0)

        d = s.find('div', attrs=x)

        bid.desc = get_all_text(d)
        bid.save()

예제 #19

0

파일 보기

파일: saint-marys.py 프로젝트: thayton/bidmap

    def scrape_bids(self):
        self.br.open(self.org.bids_page_url)

        s = soupify(self.br.response().read())
        r = re.compile(r'^\s*Solicitation')
        f = lambda x: x.name == 'td' and re.search(r, x.text)

        self.org.bid_set.all().delete()

        for td in s.findAll(f):
            tr = td.findParent('tr')
            td = tr.findAll('td')

            bid = Bid(org=self.org)
            bid.title = td[0].text
            bid.url = self.br.geturl()
            bid.description = get_all_text(td[1])

            z = re.search(self.date_regex, td[-3].text)
            if z:
                m,d,y = z.groups()
                bid.due_date = datetime.date(day=int(d), month=int(m), year=int(y))

            bid.save()

예제 #20

0

파일 보기

파일: bidsaspx.py 프로젝트: thayton/bidmap

    def scrape_bid_description(self, bid):
        self.br.open(bid.url)

        s = soupify(self.br.response().read())
        x = {'summary': 'Bid Details'}
        t = s.find('table', attrs=x)
        
        c = re.compile(r'^Contact', re.IGNORECASE)
        f = lambda x: x.name == 'span' and 'BidListHeader' in x.attrs.get('class', []) and re.search(c, x.text)
        p = t.find(f)

        if p:
            tr = p.findNext('tr')
            bid.contact = tr.text.strip()

            e = re.compile(r'\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b', re.IGNORECASE) # email regex
            v = tr.find(text=e)

            if v:
                m = re.search(e, v)
                bid.email = m.group(0)

        bid.description = get_all_text(t)
        bid.save()