def _download(self, urls, output: str): for url in urls: r = self._request_get(url) info = self._get_info(url, r) os.makedirs(output, exist_ok=True) info['name_file'] = os.path.join(output, info['name_file']) dl(info['download_url'], info['name_file'], progressbar=self._progress_bar, verbose=self._verbose, replace=self._replace)
def _extract_info(self, url, download=True): r = self._request_get(url) info = self._get_info(url, r) if download: dl(info['download_url'], info['name_file'], progressbar=self._progress_bar, verbose=self._verbose, replace=self._replace) return info else: return info
def sync(): cleanup() client = caldav.DAVClient(url, username=username, password=password) principal = client.principal() calendars = principal.calendars() print("Discovering calendars...") if len(calendars) > 0: calendar = choose_cal(calendars) print("Synchronizing Calendar....") for event in calendar.events(): event_url = str(event) event_url = re.sub('^Event: ', '', event_url) dl(event_url, username, password) print("Generating overview....") overview_page = parse() print("Done!")
def get_good_page(self,url): d = dl(url) content = d.get() if (content != None): soup = bfs(content) count = soup.findAll('div',{'id':'resultCount'})[0].contents[0] string = re.search('\d*,\d{3}',count).group() string = re.sub(',','',string) page = int(int(string)/24) + 1 return page return 1
def _download(self, urls, folder=None): for url in urls: try: check_valid_zippyshare_url(url) except InvalidURL as e: self._logger_error(str(e)) raise e r = self._request_get(url) if r.status_code != 200: self._logger_error('Zippyshare send %s code' % (r.status_code)) info = self._get_info(url, r) self._logger_info('Downloading "%s"' % (info['name_file'])) if folder is not None and isinstance(folder, str): self._logger_info( f'Using directory "{os.path.join(os.getcwd(), folder)}"') path = os.path.join(os.getcwd(), folder, info['name_file']) else: path = info['name_file'] dl(info['download_url'], path, progressbar=self._progress_bar, verbose=self._verbose, replace=self._replace)
def _extract_info(self, url, download=True, folder=None, custom_filename=None): try: check_valid_zippyshare_url(url) except InvalidURL as e: self._logger_error(str(e)) raise e r = self._request_get(url) if r.status_code != 200: self._logger_error('Zippyshare send %s code' % (r.status_code)) info = self._get_info(url, r) if download: self._logger_info('Downloading "%s"' % (info['name_file'])) if folder is not None and isinstance(folder, str): self._logger_info( f'Using directory "{os.path.join(os.getcwd(), folder)}"') if custom_filename is not None and isinstance( custom_filename, str): self._logger_info('Using custom filename "%s"' % (custom_filename)) path = os.path.join(os.getcwd(), folder, custom_filename) else: path = os.path.join(os.getcwd(), folder, info['name_file']) else: path = info['name_file'] dl(info['download_url'], path, progressbar=self._progress_bar, verbose=self._verbose, replace=self._replace) return info else: return info
def get_good_list(self): #1 get page count url = self.baseurl + self.url page = self.get_good_page(url) # start get good list for i in range(1,page): good_list = [] #print "start get page " + str(i) + " " + self.url fetch_url = url + '&page=' + str(i) print fetch_url d = dl(fetch_url) content = d.get() if (content != None): soup = bfs(content) # search good list product1 = soup.findAll('div',{'class':re.compile('result [\S ]*product')}) num = len(product1) # insert to db d = db.db() # get product id for p_id in range(num): product_id = product1[p_id].attrs[2][1] good_list.append(product_id) #print product_id #data = [] #data.append(product_id) d.query('insert into joyo value (\'' + product_id + '\')') #del data #print (good_list) # unset good_list #del(good_list) return True
def main(): url = 'http://www.163.com' d = dl(url) print d.get()