def scrape_bid_description(self, bid): self.br.open(bid.url) s = soupify(self.br.response().read()) x = {'summary': 'Bid Details'} t = s.find('table', attrs=x) c = re.compile(r'^Contact', re.IGNORECASE) f = lambda x: x.name == 'span' and 'BidListHeader' in x.attrs.get( 'class', []) and re.search(c, x.text) p = t.find(f) if p: tr = p.findNext('tr') bid.contact = tr.text.strip() e = re.compile(r'\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b', re.IGNORECASE) # email regex v = tr.find(text=e) if v: m = re.search(e, v) bid.email = m.group(0) bid.description = get_all_text(t) bid.save()
def scrape_bid_description(self, bid): self.br.open(bid.url) s = soupify(self.br.response().read()) r = re.compile(r'^Agent') f = lambda x: x.name == 'b' and re.search(r, x.text) b = s.find(f) if b: t = b.findParent('table') bid.contact = get_all_text(t) b = s.find('blockquote') bid.description = get_all_text(b) bid.save()
def scrape_bids(self): self.br.open(self.org.bids_page_url) s = soupify(self.br.response().read()) r = re.compile(r'^\s*Solicitation') f = lambda x: x.name == 'td' and re.search(r, x.text) self.org.bid_set.all().delete() for td in s.findAll(f): tr = td.findParent('tr') td = tr.findAll('td') bid = Bid(org=self.org) bid.title = td[0].text bid.url = self.br.geturl() bid.description = get_all_text(td[1]) z = re.search(self.date_regex, td[-3].text) if z: m, d, y = z.groups() bid.due_date = datetime.date(day=int(d), month=int(m), year=int(y)) bid.save()
def scrape_bid_description(self, bid): self.br.open(bid.url) d = self.br.response().read() s = soupify(doctohtml(d)) bid.description = get_all_text(s.html.body) bid.save()
def scrape_bid_description(self, bid): self.br.open(bid.url) d = self.br.response().read() s = soupify(pdftohtml(d)) bid.description = get_all_text(s.html.body) bid.save()
def scrape_bid_description(self, bid): self.br.open(bid.url) s = soupify(self.br.response().read()) x = {'class': 'bidViewResultsLeft'} t = s.find('td', attrs=x) bid.description = get_all_text(t) bid.save()
def scrape_bid_description(self, bid): self.br.open(bid.url) s = soupify(self.br.response().read()) x = {'class': 'bidViewResultsLeft'} t = s.find('td', attrs=x) bid.description = get_all_text(t) bid.save()
def scrape_bid_description(self, bid): self.br.open(bid.url) s = soupify(self.br.response().read()) x = {'class': 'ms-formtable'} t = s.find('table', attrs=x) bid.description = get_all_text(t) bid.save()
def scrape_bid_description(self, bid): self.br.open(bid.url) s = soupify(self.br.response().read()) x = {'class': 'content'} d = s.find('div', attrs=x) bid.description = get_all_text(d) bid.save()
def scrape_bid_description(self, bid): self.br.open(bid.url) s = soupify(self.br.response().read()) r = re.compile(r'PurchasingBids') t = s.find('table', id=r) bid.description = get_all_text(t) bid.save()
def scrape_bid_description(self, bid): self.br.open(bid.url) s = soupify(self.br.response().read()) x = {'class': 'ms-formtable'} t = s.find('table', attrs=x) bid.description = get_all_text(t) bid.save()
def scrape_bid_description(self, bid): self.br.open(bid.url) s = soupify(self.br.response().read()) x = {'class': 'content'} d = s.find('div', attrs=x) bid.description = get_all_text(d) bid.save()
def scrape_bid_description(self, bid): self.br.open(bid.url) s = soupify(self.br.response().read()) h = s.find('h1') t = h.findParent('table') bid.description = get_all_text(t) bid.save()
def scrape_bid_description(self, bid): self.br.open(bid.url) s = soupify(self.br.response().read()) h = s.find('h1') t = h.findParent('table') bid.description = get_all_text(t) bid.save()
def scrape_bid_description(self, bid): self.br.open(bid.url) s = soupify(self.br.response().read()) t = s.find(text=re.compile(r'^Contact:')) if t: p = t.findParent('p') bid.contact = get_all_text(p) f = lambda x: x.name == 'a' and x.text == 'Download Bid Package' a = s.find(f) u = urlparse.urljoin(self.br.geturl(), a['href']) self.br.open(u) d = self.br.response().read() s = soupify(pdftohtml(d)) bid.description = get_all_text(s.html.body) bid.save()
def scrape_bid_description(self, bid): self.br.open(bid.url) s = soupify(self.br.response().read()) r = re.compile(r'showdocument\?id=\d+$') a = s.find('a', href=r) u = urlparse.urljoin(self.br.geturl(), a['href']) self.br.open(u) d = self.br.response().read() s = soupify(pdftohtml(d)) bid.description = get_all_text(s.html.body) bid.save()
def scrape_bid_description(self, bid): self.br.open(bid.url) s = soupify(self.br.response().read()) r = re.compile(r'showdocument\?id=\d+$') a = s.find('a', href=r) u = urlparse.urljoin(self.br.geturl(), a['href']) self.br.open(u) d = self.br.response().read() s = soupify(pdftohtml(d)) bid.description = get_all_text(s.html.body) bid.save()
def scrape_bid_description(self, bid): self.br.open(bid.url) s = soupify(self.br.response().read()) x = {'class': 'body-content'} y = {'class': 'promo'} n = s.find('section', attrs=y) bid.contact = n.text m = re.search(self.email_regex, n.text) if m: bid.email = m.group(0) d = s.find('div', attrs=x) bid.desc = get_all_text(d) bid.save()
def scrape_bids(self): self.br.open(self.org.bids_page_url) s = soupify(self.br.response().read()) r = re.compile(r'^\s*Solicitation') f = lambda x: x.name == 'td' and re.search(r, x.text) self.org.bid_set.all().delete() for td in s.findAll(f): tr = td.findParent('tr') td = tr.findAll('td') bid = Bid(org=self.org) bid.title = td[0].text bid.url = self.br.geturl() bid.description = get_all_text(td[1]) z = re.search(self.date_regex, td[-3].text) if z: m,d,y = z.groups() bid.due_date = datetime.date(day=int(d), month=int(m), year=int(y)) bid.save()
def scrape_bid_description(self, bid): self.br.open(bid.url) s = soupify(self.br.response().read()) x = {'summary': 'Bid Details'} t = s.find('table', attrs=x) c = re.compile(r'^Contact', re.IGNORECASE) f = lambda x: x.name == 'span' and 'BidListHeader' in x.attrs.get('class', []) and re.search(c, x.text) p = t.find(f) if p: tr = p.findNext('tr') bid.contact = tr.text.strip() e = re.compile(r'\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b', re.IGNORECASE) # email regex v = tr.find(text=e) if v: m = re.search(e, v) bid.email = m.group(0) bid.description = get_all_text(t) bid.save()