def download(self): # Write CSV header append_csv(OUTPUT_FILE, CSV_HEADER) # Search rolls search = {'class': 'styletblfont'} for item in self.rolls: html = urlget(item['url']) assert html item['html'] = html base = baseurl(item['url']) soup = BeautifulSoup(html, 'lxml') table = soup.find('table', search) links = table.find_all('a') rolls = [] for link in links: url = urljoin(base, link.get('href')) try: num, name = [x.strip() for x in link.text.split('-')] except (ValueError, TypeError): continue else: rolls.append(Roll(num, name, url, item['lang'])) item['data'] = sorted(rolls, key=lambda x: x.key) # Run download rolls for item in self.rolls: for roll in item['data']: roll.download()
def download(self): try: self.file = urldown(url=self.url, dest=self.outdir) except AssertionError: pass row = (self.ac_num, self.ac_name, self.station_num, self.station_name, self.lang, relpath(self.file) if self.file is not None else 'Not available / Unable to download') append_csv(OUTPUT_FILE, row)
def download(self): logger.warning( 'Processing: Station "%s-%s", AC "%s", District "%s"...' % (self.num, self.name, self.ac_name, self.dist_name)) self.__fetch_pdf('telugu') self.__fetch_pdf('english') row = (self.dist_name, self.ac_name, self.num, self.name, self.location, relpath(self.telugu_file) if self.telugu_file is not None else DOWNLOAD_FAILED, relpath(self.english_file) if self.english_file is not None else DOWNLOAD_FAILED) append_csv(self.session.track.output, row) self.session.track.set_done_station(self.num)
def download(self): if not os.path.isfile(self.session.track.output): append_csv(self.session.track.output, CSV_HEADER) logger.warning('Finding Districts...') self.session.track.set_cur_step(0) self.soup = self.session.get() select = self.soup.find('select', {'id': 'ddlDist'}) if select is None: logger.warning('Could not parse Districts in response!') raise ExitRequested options = select.find_all('option') if len(options) < 2: logger.warning('No District found!') raise ExitRequested logger.warning('Found %s Districts.' % (len(options) - 1)) done_dist = self.session.track.get_done_dist() for option in options[1:]: dist_num = option.get('value') dist_name = option.text.strip() if ASSIGNED_DISTRICTS and int(dist_num) not in ASSIGNED_DISTRICTS: logger.warning('Skipped District "%s" (Not Assigned)' % dist_name) continue if int(dist_num) <= done_dist: logger.warning('Skipped District "%s" (Done Already)' % dist_name) continue District(num=dist_num, name=dist_name, session=self.session) logger.warning('Completed successfully.') self.session.track.set_done()
def download(self): try: js = urlget(self.js_url) except AssertionError: return self.js_content = js parsed = self.parse_js() for item in parsed: ac, polls = item ac_num, ac_name = ac for i in range(polls): poll_num = i + 1 url = self.get_pdf_url(ac_num, poll_num) try: file = urldown(url=url, dest=getpath(TRIPURA_PDF_DIR)) except AssertionError: file = None row = (self.name, str(ac_num), ac_name, str(poll_num), relpath(file) if file is not None else 'Not available / Unable to download') self.rolls.append(row) append_csv(OUTPUT_FILE, row)
def download(self): # Write CSV header append_csv(OUTPUT_FILE, CSV_HEADER) # Run download for district in self.districts: district.download()