def get_post_list(self, url, items): xml = Download(url).request() if xml is False: return lis = xml.xpath('//div[@class="section list"][1]/ul/li') for li in lis: a = li.find("a") span = li.find("span") if self.url_repeat(a.get("href")) is False: item = GDWJWItem() item.set_info({ "title": a.get("title"), "sourceUrl": a.get("href"), "_id": generate_hash("{}{}".format(a.get("title"), span.text)), "agency": "广东省卫健委", "date": span.text, "effective": True }) items.append(item)
class Gui: def __init__(self): """ Download images and saves them into the folder images shows them one by one in a tkinter window and then delete the folder after all images has been shown. """ self.window = tk.Tk() pad = 3 self.window.geometry("{0}x{1}+0+0".format(self.window.winfo_screenwidth()-pad, self.window.winfo_screenheight()-pad)) #Sets the window fullscreen self.label = tk.Label() self.label.pack() self.Reddit = Reddit() self.Download = Download() links = self.Reddit.reddit() for link in links: self.Download.download(link) files = os.listdir("images") # Creates an array with all filenames in images map counter = 0 for file in files: if counter != 0 : time.sleep(10) photo = ImageTk.PhotoImage(Image.open("images/{}".format(file))) self.label.configure(image=photo) self.label.image = photo self.window.update_idletasks() counter = 1 self.Download.delete_folder()
def __init__(self, keyword, hostname, username, password, schema, tablename, search_type=True): """ initializing the WeChat crawler(mainly setup the local db) and input some key params :param keyword: the searching words :param search_type: the searching method: by_type: True or by_author: False """ Download.__init__(self) self.query = keyword self.search_type = search_type self.headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36' } #数据库 self.db = pymysql.connect(str(hostname), str(username), str(password), str(schema)) self.tablename = tablename cursor = self.db.cursor() print('已连接上数据库') #登录 account = input("pls enter ur wechat account: ") pwd = input("pls enter ur wechat password: ") generator = Generator(account=account, password=pwd) generator.generate() print('扫码成功,数据生成结束')
def get_post(self, item): if item.get_info("sourceUrl").split(".")[-1] == "pdf": return xml = Download(item.get_info("sourceUrl")).request() if xml is False: return try: source_date = xml.xpath( '//div[@class="xxxq_text_tit"][1]/h6/span[2]')[0] source_date = ["深圳市卫生健康委员会", source_date.text.replace("发布日期:", "")] except Exception as e: print_info("{} 解析失败".format(item.get_info("sourceUrl"))) return body = [] for p in xml.xpath('//div[@class="TRS_Editor"]/p'): if p.text: body.append(p.text) else: continue date = source_date[1] update_info = { "date": date, "_id": generate_hash("{}{}".format(item.get_info("title"), date)), "source": source_date[0], "body": "\n".join(body), "effective": True } item.set_info(update_info)
def __init__(self): """ Download images and saves them into the folder images shows them one by one in a tkinter window and then delete the folder after all images has been shown. """ self.window = tk.Tk() pad = 3 self.window.geometry("{0}x{1}+0+0".format( self.window.winfo_screenwidth() - pad, self.window.winfo_screenheight() - pad)) #Sets the window fullscreen self.label = tk.Label() self.label.pack() self.Reddit = Reddit() self.Download = Download() links = self.Reddit.reddit() for link in links: self.Download.download(link) files = os.listdir( "images") # Creates an array with all filenames in images map counter = 0 for file in files: if counter != 0: time.sleep(10) photo = ImageTk.PhotoImage(Image.open("images/{}".format(file))) self.label.configure(image=photo) self.label.image = photo self.window.update_idletasks() counter = 1 self.Download.delete_folder()
def download(self): self.switchFrame() from download import Download d = Download(self.mainframe) d.pack(fill=BOTH, expand=True, padx=10, pady=5) self.root.wait_window(d) self.player()
def get_page_num(self): xml = Download(self._start_url).request() if xml is False: return 1 js_func = xml.xpath('//div[@class="zx_ml_list_page"]/script/text()')[0] js_func = js_func.replace("createPageHTML(", "").replace(");", "") return int(js_func.split(",")[0])
def get_post(self, item): xml = Download(item.get_info("sourceUrl")).request() if xml is False: return try: source_date = xml.xpath( '//p[@class="margin_top15 c999999 text_cencer"]')[0].text except Exception: print_info("{} 解析失败".format(item.get_info("sourceUrl"))) return source_date = source_date.split(" ") body = [] for p in xml.xpath('//div[@class="content-content"]/p'): if p.text: body.append(p.text) date = "{} {}".format(source_date[0].replace("时间:", ""), source_date[1]) update_info = { "date": date, "_id": generate_hash("{}{}".format(item.get_info("title"), date)), "source": source_date[3].replace("来源:", ""), "body": "\n".join(body), "effective": True } item.set_info(update_info)
def __init__(self): self.download = Download() self.home = 'https://blog.csdn.net' self.catetories = [] self.blog_user = [] self.queue = Queue.Queue() pass
def addNewTasks(self): if self.lock.acquire(True): try: if self.running: #logging.debug("task complete! time for next task!") if isinstance(self.currentTask, Upload): #logging.debug('we completed a download') # after downloading - we check for uploads # but first we relax for 5 seconds # it's important to relax - to give swfit a few # seconds to catch up if not self.currentTask.hadWorkToDo: # if you didn't have any work to do - take a break! if not self._starting: self.taskList.put(Sleep(self.updateInterval)) else: #logging.info('there was work to do - no resting!') pass download = Download(self.objectStore, self.outputQueue) self.taskList.put(download) self._starting = False elif isinstance(self.currentTask, Download): #logging.debug('we completed a upload') if not self.currentTask.hadWorkToDo: # if you didn't have any work to do - take a break! if not self._starting: self.taskList.put(Sleep(self.updateInterval)) else: #logging.info('there was work to do - no resting!') pass upload = Upload(self.objectStore, self.outputQueue) self.taskList.put(upload) self._starting = False elif isinstance(self.currentTask, Authenticate): if self.currentTask.isAuthenticated: #upload = Upload(self.objectStore, # self.outputQueue) #self.taskList.put(upload) download = Download(self.objectStore, self.outputQueue) self.taskList.put(download) else: sleep = Sleep(self.currentTask.retryWait) #logging.info('failed to auth' # ' - sleeping for %r' # % self.currentTask.retryWait) self.taskList.put(sleep) self.taskList.put(self.currentTask) elif isinstance(self.currentTask, Sleep): pass elif isinstance(self.currentTask, update.Update): if self.currentTask.hadWorkToDo: self.scheduleRestart() else: logging.warn('unhandeled task completion!') finally: self.lock.release()
def listening_test(): data = json.loads(request.get_data(as_text=True)) url_id = data['url_id'] name = data['name'] author = data['author'] source = data['source'] d = Download(name, author, url_id, source) url = d.listening_test() ret = {'url': url} return jsonify(ret)
def test_get_next(self): dl = Download(self._config, self._db) result = dl._get_next("http://example.com/", "<a href='http://example.com/next' class='nav-next'>Next Comic</a>") self.assertEquals(result, "http://example.com/next") result = dl._get_next("http://example.com/", "<a href='/next' class='nav-next'>Next Comic</a>") self.assertEquals(result, "http://example.com/next") result = dl._get_next("http://example.com/", "<a href='next' class='nav-next'>Next Comic</a>") self.assertEquals(result, "http://example.com/next")
def process_queue(): while True: # keep track that are processing url try: url = urllist.pop() print('url', url) D = Download() D.Downloader(url) except KeyError: # currently no urls to process break
def get_page_num(self): xml = Download(self._start_url).request() if xml is False: return 1 last_url = xml.xpath('//a[@class="last"]')[0].xpath("@href")[0] html_names = re.findall(pattern=r"index_[\d]*.html", string=last_url) if len(html_names) >= 1: pages_num = int(html_names[0].replace("index_", "").replace(".html", "")) return pages_num else: return 1
def download(): data = json.loads(request.get_data(as_text=True)) url_id = data['url_id'] name = data['name'] author = data['author'] source = data['source'] d = Download(name, author, url_id, source) is_ok = d.download() if is_ok: ret = {'state': 1} else: ret = {'state': 0} return jsonify(ret)
def fetch_data(self): url = City.url_prefix + self._state + '/' + self._city dest = City.file_prefix + self._abbrv + '/City/' + self._city print 'downloading state (%s), city (%s), url (%s), dest (%s)' % (self._state, self._city, url, dest) dst_file = File(dest) if dst_file.exists() is True: print '........Data for %s, %s already present' % (self._state, self._city) return download = Download(url, dest) download.download_cookie()
def main(): # print 'Running.' # url = 'https://blog.csdn.net/GitChat' # download = Download() # articledb = ArticleDB(DB(*config)) # spider = Spider(url, download, articledb) # spider.start() # print 'Done.' # index url = 'https://blog.csdn.net/' download = Download() html = download.down_html(url, save=True) print html
def load_from_xml(self): """Loads download objects from the xml file.""" self.download_file_path = os.path.join(self.config.base_dir, DOWNLOADS_FILE) if not os.path.exists(self.download_file_path): self.__create_xml() else: self.tree = ET.parse(self.download_file_path) downloads_element = self.tree.getroot() for download_element in list(downloads_element): uri = download_element.findtext("uri") path = download_element.findtext("path") file_name = download_element.findtext("filename") total_size = download_element.findtext("size") status = download_element.findtext("status") date_started = download_element.findtext("date_started") date_completed = download_element.findtext("date_completed") download = Download(uri, path, date_started, date_completed) download.file_name = file_name if total_size: download.total_size = int(total_size) if status: download.status = int(status) if download.status == COMPLETED: download.percent_complete = 100 else: if download.total_size != 0: download.percent_complete = 100 * download.current_size / download.total_size else: download.percent_complete = 0 self.__append_download(download)
def fetch_data(self): url = City.url_prefix + self._state + '/' + self._city dest = City.file_prefix + self._abbrv + '/City/' + self._city print 'downloading state (%s), city (%s), url (%s), dest (%s)' % ( self._state, self._city, url, dest) dst_file = File(dest) if dst_file.exists() is True: print '........Data for %s, %s already present' % (self._state, self._city) return download = Download(url, dest) download.download_cookie()
class Spider_Music(): def __init__(self): self.download = Download() self.url_manager = Url_Manager() self.html_parser = Html_Parser() self.save = Save() self.set_color = Set_Color() def craw(self,url): self.url_manager.addurl({'url':url,'name':'temp'}) while self.url_manager.checknewurllength>0: newurl = self.url_manager.geturl() if self.save.checkfile(newurl['name']): self.set_color.printDarkRed("{} 已下载!\n".format(newurl['name'])) continue print("开始下载 {} {}".format(newurl['name'],newurl['url'])) htmlcontent = self.download.download(newurl['url']) newurls,result = self.html_parser.parser(htmlcontent) self.url_manager.addurls(newurls) self.save.save(result,newurl['name']) print("下载完成 {} ".format(newurl['name'])) print("共下载{}首歌曲".format(self.save.count)) def main(self): self.craw('https://music.163.com/#/playlist?id=2492536378')
def download_OK_callback(self): download_savepath = self.v_download_savepath.get() download_URL = self.v_download_URL.get() download_rename = self.v_download_rename.get() download_rename = download_rename.replace(" ", "") if download_URL == "": #链接和保存路径均不能为空 self.t_download_process.configure(state = "normal") self.t_download_process.insert(END, "请输入视频链接!\n") self.t_download_process.see(END) self.t_download_process.configure(state = "disabled") elif download_savepath == "": self.t_download_process.configure(state = "normal") self.t_download_process.insert(END, "请选择视频保存路径!\n") self.t_download_process.see(END) self.t_download_process.configure(state = "disabled") elif download_rename == "": self.t_download_process.configure(state = "normal") self.t_download_process.insert(END, "请给视频命名!\n") self.t_download_process.see(END) self.t_download_process.configure(state = "disabled") else: #self.t_download_process.insert(END, "连接YouTube URL中...\n") #开启下载线程 self.ent_download_rename.delete(0, END) D = Download() th_download = threading.Thread(target = D.start,args=(download_URL,download_savepath,download_rename,self.t_download_process,)) th_download.setDaemon(True) #主线程退出子线程也退出 th_download.start()
def link_crawler(seed_url, link_regex=None, max_depth=1, user_agent='wawp', proxies=None, num_retries=1, cache=None): crawl_queue = [seed_url] #url存放列表 seen = {seed_url: 0} #判断爬取深度 num_urls = 0 #总共下载多少页 rp = get_robots(seed_url) D = Download(user_agent=user_agent, proxies=proxies, num_retries=num_retries, cache=cache) while crawl_queue: url = crawl_queue.pop() #取出list里边最后一个url depth = seen[url] #把当前url的请求次数赋值 if rp.can_fetch(user_agent, url): html = D(url) links = [] if depth != max_depth: if link_regex: links.extend(link for link in get_links(html, link_regex) if re.search(link_regex, links)) #添加list for link in links: link = normalize(seed_url, link) #拼接url if link not in seen: seen[link] = depth + 1 if same_domain(seed_url, link): crawl_queue.append(link)
def preprocessorMain(self): self.removeTargetColumn() while (1): print("\nTasks (Preprocessing)\n") for task in self.tasks: print(task) while (1): try: choice = int( input( "\nWhat do you want to do? [enter -1 to exit]: ")) except ValueError: print("Integer Value required. Try again.....") continue break if choice == -1: exit() elif choice == 1: DataDescription(self.data).describe() elif choice == 2: self.data = Imputation(self.data).imputer() elif choice == 3: self.data = Categorical(self.data).categoricalMain() elif choice == 4: self.data = FeatureScaling(self.data).scaling() elif choice == 5: Download(self.data).download() else: print("\nWrong choice!! Try again...")
def add_download(self, turl): if self.get_download(url=turl) == None: self.downloads.append(Download(turl, self.config.config["DownloadManager"]["DownloadLocation"] + "\\" + turl.strip('/')[-1])) else: raise Exception("Download url is already being downloaded") self.updatefunc()
def fetch_cities_in_state(self): for alpha in self._alpha: url = State.url_prefix + self._abbrv + "/" + alpha dest = State.file_prefix + self._abbrv + "/" + alpha print "downloading state (%s), url (%s), state (%s)" % (self._state, url, dest) dir = Dir(dest) if dir.exists() is False: dir.create_if_needed() # check if data is present data_file = File(dest + "/file") if data_file.exists() is True: print "data present for state %s, %s" % (self._state, alpha) continue download = Download(url, dest + "/file") download.download()
def download_song(self, song_id, path='.', id=0, ids=0): # Cannot Find Or download This song self.get_real_song_data(song_id) mp3Name = "{songName}--{author}.{format}".format( songName = self.__SONG_NAME, author = self.__SONG_AUTHOR, format = self.__SONG_FORMAT, ).strip() download_flag = (0, 0, 0) if not self.__SONG_REAL_URL: print("No valid Url.") else: download = Download() download_flag = download.download(self.__SONG_REAL_URL, mp3Name, path, id, ids) return download_flag
def get_post(self, item): xml = Download(item.get_info("sourceUrl")).request() if xml is False: return bodys = [] try: lis = xml.xpath('//div[@class="check_content_points"]/ul/li') if len(lis) > 1: for li in lis: if li.find("span").tail: bodys.append(li.find("span").tail) else: bodys.append(lis[0].text) except Exception: print_info("解析错误:{}".format(item.get_info("sourceUrl"))) return item.set_info({"body": "\n".join(bodys)})
def fetch_cities_in_state(self): for alpha in self._alpha: url = State.url_prefix + self._abbrv + '/' + alpha dest = State.file_prefix + self._abbrv + '/' + alpha print 'downloading state (%s), url (%s), state (%s)' % ( self._state, url, dest) dir = Dir(dest) if dir.exists() is False: dir.create_if_needed() # check if data is present data_file = File(dest + '/file') if data_file.exists() is True: print 'data present for state %s, %s' % (self._state, alpha) continue download = Download(url, dest + '/file') download.download()
def execute_download(table, queue, logger, is_test=False): ''' 核心下载函数 params: table -> class Table的实例 params: queue -> book task queue params: logger -> Log().Logger ''' data = queue.get(block=True, timeout=30) category = data.get('category') book = data.get('book') is_finish = data.get('is_finish') id = data.get('id') item = data.get('item') url = data.get('url') if is_finish == 1: folder = u'完结' else: folder = u'连载' filefolder = u'%s/%s/%s/%s' % (DOWNLOAD_FOLDER, folder, category, book) if not os.path.exists(filefolder): os.makedirs(filefolder) message = u'makedirs %s' % (filefolder) logger.info(message) filename = u'%d-%s.txt' % (id, item) filepath = u'%s/%s' % (filefolder, filename) download = Download(url=url, logger=logger, filepath=filepath) try: flag = download.download() except Exception, e: message = u'catch Exception:%s when execute download,put data:%s back to queue' % ( e, json.dumps(data, ensure_ascii=False)) table.logger.error(message) queue.put(data) flag = False
def main(): print("Downloading video...") url = sys.argv[1] dl = Download(url) videoFile = dl.getVideo() # videoFile = "testVideo.mp4" print("Detecting shot boundaries...") bd = BoundaryDetector(videoFile) shotBounds = bd.getShotBoundaries() print("{0} shot boundaries found...".format(np.sum(shotBounds))) # shotBounds = np.load("bounds.npy") print("Extracting representative keyframes...") kfExtractor = KeyframeExtractor(videoFile) frames = kfExtractor.extractKeyframes(shotBounds) for frame in frames: cv2.imshow("Keyframes", frame) cv2.waitKey()
def add_download(self, uri, path=None, headers={}): """Constructs a new download object and adds it to the list and xml tree.""" if path is None: path = self.config.default_folder download = Download(uri, path, headers) self.__append_download(download) self.__add_download_to_xml(download) return download
def parse_downloads(self, downloads): downloads_data = [] for download_json in downloads: url = "" url = download_json.get("download_url", "") if not url: url = download_json['url'] downloads_data.append(Download(download_json, url, self)) return downloads_data
def Download_Torrent_File(self): choose = int(input("select number: ")) for i in self.Informations(page=self.pg): if choose == int(i[0]): os.system("clear") print(f"[+] {i[1]}") print(f"[+] getting magnet") content = self.Contents(f'https://1337x.to{i[5]}') magnet_link = content.find("div", "no-top-radius").a["href"] Download(magnet_link) print("\n\t download started .. \n")
def main(): global _username, _domain, _classes, prevId init() print(Fore.WHITE + Style.BRIGHT + printBanner(), end='') with open(baseWritePath + prevId, 'r') as f: first = json.load(f) nextId = ''.join( random.choice(string.ascii_uppercase + string.digits) for _ in range(10)) first['NextId'] = nextId with open(baseWritePath + prevId, 'w') as f: json.dump(first, f) prevId = nextId content = waitAndReadFile(baseReadPath + prevId) print("[+] Connection successfully established!") time.sleep(3) print("[+] Patching ETW...") time.sleep(7) print("[+] Manually loading kernel32.dll to avoid API hooks.") time.sleep(7) print("[+] Manually loading advapi32.dll to avoid API hooks.") time.sleep(5) print("[+] Patching AMSI...") time.sleep(5) print("[+] H4ck th3 Pl4n3t!") userAndDomain = content['Output'] userAndDomain = userAndDomain.split("\\") _domain = userAndDomain[0] _username = userAndDomain[1] _classes.append(Download()) _classes.append(Base64encode()) _classes.append(Base64decode()) _classes.append(Compile()) _classes.append(Inject()) _classes.append(Powershell()) _classes.append(Send()) _classes.append(Impersonate()) _classes.append(Exfiltrate()) _classes.append(Runas()) _classes.append(Shell()) mainConsole() deinit()
def __init__(self, log, base_path): self.log = log self.config_path = os.path.join(base_path, 'config.ini') self.data = ConfigRead(self.config_path).read() # 解析配置文件 self.uncompressed_paths = Download(self.log, self.data).download() # 下载并解压安装包 self.excel_vol_titles = self.get_excel_titles() # excel表头,获取的数据类型 self.package_names = self.data['versions'].split( ',') if ',' in self.data['versions'] else [self.data['versions']]
def _install(self, target): self.path = "" if target.platform == "Windows": architecture_string = "" if target.architecture == "64": architecture_string = "-win64-x64" else: architecture_string = "-win32-x86" source_path = "CMake/cmake-3.12.3" + architecture_string + ".zip" zip_ref = zipfile.ZipFile(source_path, "r") self.path = "Build/cmake-3.12.3" + architecture_string + \ "/bin/cmake.exe" # TODO : the path we delete here doesn't seem right shutil.rmtree(self.path, ignore_errors=True) zip_ref.extractall("Build") zip_ref.close() elif target.platform == "Linux": download_url = "https://github.com/CodeSmithyIDE/CMake/archive/master.zip" download = Download("CMake", download_url, "Build") download.download(None) download.unzip() previous_working_dir = os.getcwd() os.chdir("Build/CMake") try: try: subprocess.check_call(["chmod", "0774", "bootstrap"]) except subprocess.CalledProcessError: raise RuntimeError("chmod 0774 bootstrap failed.") try: subprocess.check_call("./bootstrap") except subprocess.CalledProcessError: raise RuntimeError("./bootstrap failed.") GNUmake().compile("Makefile", None, None) self.path = "Build/CMake/bin/cmake" finally: os.chdir(previous_working_dir) else: raise RuntimeError("Unsupported platform: " + target.platform)
def start_download(self, device, request, done): """ This is called to request downloading a specified image into the standby partition of a device based on a NBI call. :param device: A Voltha.Device object. :param request: A Voltha.ImageDownload object. :param done: (Deferred) Deferred to fire when done :return: (Deferred) Shall be fired to acknowledge the download. """ log.info('image_download', request=request) try: if request.name in self._downloads: raise Exception("Download request with name '{}' already exists". format(request.name)) try: download = Download.create(self, request, self._download_protocols) except Exception: request.additional_info = 'Download request creation failed due to exception' raise try: self._downloads[download.name] = download self._update_download_status(request, download) done.callback('started') return done except Exception: request.additional_info = 'Download request startup failed due to exception' del self._downloads[download.name] download.cancel_download(request) raise except Exception as e: self.log.exception('create', e=e) request.reason = ImageDownload.UNKNOWN_ERROR request.state = ImageDownload.DOWNLOAD_FAILED if not request.additional_info: request.additional_info = e.message self.adapter_agent.update_image_download(request) # restore admin state to enabled device.admin_state = AdminState.ENABLED self.adapter_agent.update_device(device) raise
os.makedirs(comicConfig['folder']) if opts.static: logging.debug("Doing a static download") # Setup static env comicConfig['static'] = {} comicConfig['static']['template'] = config['static']['template'] comicConfig['static']['htmlDir'] = os.path.join(comicConfig['folder'], "html") if not os.path.exists(comicConfig['static']['htmlDir']): stylePath = os.path.join(comicConfig['static']['htmlDir'], "styles") os.makedirs(stylePath) shutil.copy(os.path.join(config['directories']['html'], "lib", "bootstrap", "bootstrap.min.css"), stylePath) shutil.copy(os.path.join(config['directories']['html'], "styles", "comics.css"), stylePath) # Do the download dl = Download(comicConfig, config['database']) dl.create_static(config['static']['number'], config['static']['template']) else: logging.debug("Adding files to database") dl = Download(comicConfig, config['database']) dl.crawl_comic() except IOError as e: logging.exception("Could not read from config file %s: %s. Skipping.", file, e) continue except ValueError as e: logging.exception("Could not parse JSON in %s: %s. Skipping.", file, e) continue
def save_as_json(nodes, path): """Save list of nodes to file as json""" json_object = json.dumps(a.__dict__, sort_keys=True, indent=4, separators=(',',': ')) Download.save_content(json_object, path)
from flask import send_from_directory app = flask.Flask(__name__) app.secret_key = settings.secret_key app.add_url_rule('/', view_func=Main.as_view('main'), methods=['GET','POST']) app.add_url_rule('/login/', view_func=Login.as_view('login'), methods=['GET', 'POST']) app.add_url_rule('/download/', view_func=Download.as_view('download'), methods=['GET', 'POST']) app.add_url_rule('/score/', view_func=Score.as_view('score'), methods=['GET']) app.add_url_rule('/solve/', view_func=Solve.as_view('solve'), methods=['GET', 'POST']) app.add_url_rule('/<page>/', view_func=Main.as_view('page'), methods=['GET']) import os
from archive import Archive from download import Download import os import re def print_(str): print str print 'Loading...' if not os.path.exists('./DDD') : os.makedirs('./DDD') d = Download() config = open('./config.txt') blogger = [] while True: line = config.readline() if not line: break blogger.append(line[:-1]) for i in blogger: if not os.path.exists('./DDD/'+i) : os.makedirs('./DDD/'+i) all_ids = os.listdir('./DDD/' + i) a = Archive('http://' + i + '.diandian.com/archive') d.reg_callback(print_) archives = a.getAllArchive() for j in archives: for k in archives[j]: print k try: if re.search(r'post\/.*\/(.*)', k).group(1) not in all_ids:
def download(url): d = Download() return (d.time(url), whoami, url)
from download import Download from parse import Parse from item import Item import xml.etree.ElementTree as ET url = 'http://showrss.info/feeds/1166.rss' #path = '1166.rss' src = Download.read_content('test.xml') #Download.save_file(url, 'test2.xml') #print(src) l = Parse.parse_content(src) for x in l: print(x) #root = ET.fromstring(src) #print([x for x in [child for child in root.iter('item')]])
if posted < start_date: break f = urllib.urlopen(url + page[0]) s = f.read() f.close() flv = re.search('<input type="text" name="filepath" value="([^"]+)"', s) if flv: flvurl = flv.group(1) #download the file file_path = (flvurl.rsplit('/',1))[1] print page[1] + ' ' + flvurl if options.dry_run: continue flvurl = urllib.quote_plus(flvurl, safe=':/') d = Download(flvurl, options.destination) d.start() while 1: try: progress = d.progress['percent'] print("%.2f percent | %.2f of %.2f" % (progress, d.progress['downloaded'], d.progress['total'])) if progress == 100: print("") print("Download complete: %s" % d.filename) break time.sleep(10) # tell thread to terminate on keyboard interrupt, # otherwise the process has to be killed manually except KeyboardInterrupt: