def addNewTasks(self): if self.lock.acquire(True): try: if self.running: #logging.debug("task complete! time for next task!") if isinstance(self.currentTask, Upload): #logging.debug('we completed a download') # after downloading - we check for uploads # but first we relax for 5 seconds # it's important to relax - to give swfit a few # seconds to catch up if not self.currentTask.hadWorkToDo: # if you didn't have any work to do - take a break! if not self._starting: self.taskList.put(Sleep(self.updateInterval)) else: #logging.info('there was work to do - no resting!') pass download = Download(self.objectStore, self.outputQueue) self.taskList.put(download) self._starting = False elif isinstance(self.currentTask, Download): #logging.debug('we completed a upload') if not self.currentTask.hadWorkToDo: # if you didn't have any work to do - take a break! if not self._starting: self.taskList.put(Sleep(self.updateInterval)) else: #logging.info('there was work to do - no resting!') pass upload = Upload(self.objectStore, self.outputQueue) self.taskList.put(upload) self._starting = False elif isinstance(self.currentTask, Authenticate): if self.currentTask.isAuthenticated: #upload = Upload(self.objectStore, # self.outputQueue) #self.taskList.put(upload) download = Download(self.objectStore, self.outputQueue) self.taskList.put(download) else: sleep = Sleep(self.currentTask.retryWait) #logging.info('failed to auth' # ' - sleeping for %r' # % self.currentTask.retryWait) self.taskList.put(sleep) self.taskList.put(self.currentTask) elif isinstance(self.currentTask, Sleep): pass elif isinstance(self.currentTask, update.Update): if self.currentTask.hadWorkToDo: self.scheduleRestart() else: logging.warn('unhandeled task completion!') finally: self.lock.release()
def main(): logging.basicConfig(format=u'%(levelname)-8s [%(asctime)s] %(message)s', level=logging.INFO) db = Database() pos = db.get_position() d = Download(pos) if not d.auth(Config.USER_NAME, Config.USER_PASS): logging.fatal("No access to SDO, exit...") exit(0) logging.info("Starting from: " + str(pos)) while True: res = d.load_user() if res is not Msg.critical_http and res is not Msg.critical_parse: if res is not Msg.err_not_found: user = d.pop_user() logging.info("User " + user.first_name + " " + user.last_name + " added") db.insert_user(user) else: logging.info("Wrong user id, skipped...") db.update_position(d.next()) sleep(0.3) else: logging.fatal("Some fatal error, exit...") break logging.info("--- APP END ---")
def preprocessorMain(self): self.removeTargetColumn() while (1): print("\nTasks (Preprocessing)\n") for task in self.tasks: print(task) while (1): try: choice = int( input( "\nWhat do you want to do? [enter -1 to exit]: ")) except ValueError: print("Integer Value required. Try again.....") continue break if choice == -1: exit() elif choice == 1: DataDescription(self.data).describe() elif choice == 2: self.data = Imputation(self.data).imputer() elif choice == 3: self.data = Categorical(self.data).categoricalMain() elif choice == 4: self.data = FeatureScaling(self.data).scaling() elif choice == 5: Download(self.data).download() else: print("\nWrong choice!! Try again...")
def __init__(self): """ Download images and saves them into the folder images shows them one by one in a tkinter window and then delete the folder after all images has been shown. """ self.window = tk.Tk() pad = 3 self.window.geometry("{0}x{1}+0+0".format( self.window.winfo_screenwidth() - pad, self.window.winfo_screenheight() - pad)) #Sets the window fullscreen self.label = tk.Label() self.label.pack() self.Reddit = Reddit() self.Download = Download() links = self.Reddit.reddit() for link in links: self.Download.download(link) files = os.listdir( "images") # Creates an array with all filenames in images map counter = 0 for file in files: if counter != 0: time.sleep(10) photo = ImageTk.PhotoImage(Image.open("images/{}".format(file))) self.label.configure(image=photo) self.label.image = photo self.window.update_idletasks() counter = 1 self.Download.delete_folder()
def download(self): self.switchFrame() from download import Download d = Download(self.mainframe) d.pack(fill=BOTH, expand=True, padx=10, pady=5) self.root.wait_window(d) self.player()
def get_post(self, item): xml = Download(item.get_info("sourceUrl")).request() if xml is False: return try: source_date = xml.xpath( '//p[@class="margin_top15 c999999 text_cencer"]')[0].text except Exception: print_info("{} 解析失败".format(item.get_info("sourceUrl"))) return source_date = source_date.split(" ") body = [] for p in xml.xpath('//div[@class="content-content"]/p'): if p.text: body.append(p.text) date = "{} {}".format(source_date[0].replace("时间:", ""), source_date[1]) update_info = { "date": date, "_id": generate_hash("{}{}".format(item.get_info("title"), date)), "source": source_date[3].replace("来源:", ""), "body": "\n".join(body), "effective": True } item.set_info(update_info)
def link_crawler(seed_url, link_regex=None, max_depth=1, user_agent='wawp', proxies=None, num_retries=1, cache=None): crawl_queue = [seed_url] #url存放列表 seen = {seed_url: 0} #判断爬取深度 num_urls = 0 #总共下载多少页 rp = get_robots(seed_url) D = Download(user_agent=user_agent, proxies=proxies, num_retries=num_retries, cache=cache) while crawl_queue: url = crawl_queue.pop() #取出list里边最后一个url depth = seen[url] #把当前url的请求次数赋值 if rp.can_fetch(user_agent, url): html = D(url) links = [] if depth != max_depth: if link_regex: links.extend(link for link in get_links(html, link_regex) if re.search(link_regex, links)) #添加list for link in links: link = normalize(seed_url, link) #拼接url if link not in seen: seen[link] = depth + 1 if same_domain(seed_url, link): crawl_queue.append(link)
def get_post(self, item): if item.get_info("sourceUrl").split(".")[-1] == "pdf": return xml = Download(item.get_info("sourceUrl")).request() if xml is False: return try: source_date = xml.xpath( '//div[@class="xxxq_text_tit"][1]/h6/span[2]')[0] source_date = ["深圳市卫生健康委员会", source_date.text.replace("发布日期:", "")] except Exception as e: print_info("{} 解析失败".format(item.get_info("sourceUrl"))) return body = [] for p in xml.xpath('//div[@class="TRS_Editor"]/p'): if p.text: body.append(p.text) else: continue date = source_date[1] update_info = { "date": date, "_id": generate_hash("{}{}".format(item.get_info("title"), date)), "source": source_date[0], "body": "\n".join(body), "effective": True } item.set_info(update_info)
def get_post_list(self, url, items): xml = Download(url).request() if xml is False: return lis = xml.xpath('//div[@class="section list"][1]/ul/li') for li in lis: a = li.find("a") span = li.find("span") if self.url_repeat(a.get("href")) is False: item = GDWJWItem() item.set_info({ "title": a.get("title"), "sourceUrl": a.get("href"), "_id": generate_hash("{}{}".format(a.get("title"), span.text)), "agency": "广东省卫健委", "date": span.text, "effective": True }) items.append(item)
def get_page_num(self): xml = Download(self._start_url).request() if xml is False: return 1 js_func = xml.xpath('//div[@class="zx_ml_list_page"]/script/text()')[0] js_func = js_func.replace("createPageHTML(", "").replace(");", "") return int(js_func.split(",")[0])
def download_OK_callback(self): download_savepath = self.v_download_savepath.get() download_URL = self.v_download_URL.get() download_rename = self.v_download_rename.get() download_rename = download_rename.replace(" ", "") if download_URL == "": #链接和保存路径均不能为空 self.t_download_process.configure(state = "normal") self.t_download_process.insert(END, "请输入视频链接!\n") self.t_download_process.see(END) self.t_download_process.configure(state = "disabled") elif download_savepath == "": self.t_download_process.configure(state = "normal") self.t_download_process.insert(END, "请选择视频保存路径!\n") self.t_download_process.see(END) self.t_download_process.configure(state = "disabled") elif download_rename == "": self.t_download_process.configure(state = "normal") self.t_download_process.insert(END, "请给视频命名!\n") self.t_download_process.see(END) self.t_download_process.configure(state = "disabled") else: #self.t_download_process.insert(END, "连接YouTube URL中...\n") #开启下载线程 self.ent_download_rename.delete(0, END) D = Download() th_download = threading.Thread(target = D.start,args=(download_URL,download_savepath,download_rename,self.t_download_process,)) th_download.setDaemon(True) #主线程退出子线程也退出 th_download.start()
def load_from_xml(self): """Loads download objects from the xml file.""" self.download_file_path = os.path.join(self.config.base_dir, DOWNLOADS_FILE) if not os.path.exists(self.download_file_path): self.__create_xml() else: self.tree = ET.parse(self.download_file_path) downloads_element = self.tree.getroot() for download_element in list(downloads_element): uri = download_element.findtext("uri") path = download_element.findtext("path") file_name = download_element.findtext("filename") total_size = download_element.findtext("size") status = download_element.findtext("status") date_started = download_element.findtext("date_started") date_completed = download_element.findtext("date_completed") download = Download(uri, path, date_started, date_completed) download.file_name = file_name if total_size: download.total_size = int(total_size) if status: download.status = int(status) if download.status == COMPLETED: download.percent_complete = 100 else: if download.total_size != 0: download.percent_complete = 100 * download.current_size / download.total_size else: download.percent_complete = 0 self.__append_download(download)
def __init__(self): self.download = Download() self.home = 'https://blog.csdn.net' self.catetories = [] self.blog_user = [] self.queue = Queue.Queue() pass
def add_download(self, turl): if self.get_download(url=turl) == None: self.downloads.append(Download(turl, self.config.config["DownloadManager"]["DownloadLocation"] + "\\" + turl.strip('/')[-1])) else: raise Exception("Download url is already being downloaded") self.updatefunc()
def btn_start_clicked(self): self.progressbar.setValue(0) self.data = [] for row in range(0, self.tw_monitor.rowCount()): self.coloum_data = [] if self.tw_monitor.item(row, 6).text() == "False": self.coloum_data.append(row) for column in range(0, self.tw_monitor.columnCount() - 1): self.coloum_data.append(self.tw_monitor.item(row, column).text()) self.data.append(self.coloum_data) print(self.data) if not self.data: pass else: self.progress = 0 for row in self.data: dl = Download() dl.link = row[1] if row[2] == "mp3": dl.format = "bestaudio/best" elif row[2] == "mp4": dl.format = "best/best" dl.output_path = row[3] if row[4] == "": dl.is_custom_name = False dl.custom_name = "" else: dl.is_custom_name = True dl.custom_name = row[4] if row[5] == "True": dl.thumbnail = True else: dl.thumbnail = False if row[6] == "True": dl.subtitle = True else: dl.subtitle = False try: dl.download() self.tw_monitor.takeItem(row[0], 6) except: self.tw_monitor.setItem(row[0], 6, QTableWidgetItem("Error")) else: self.tw_monitor.setItem(row[0], 6, QTableWidgetItem("True")) print(int(100 * (self.progress / len(self.data)))) self.progressbar.setValue(int(100 * (self.progress / len(self.data)))) self.progress += 1 self.progressbar.setValue(100)
def add_download(self, uri, path=None, headers={}): """Constructs a new download object and adds it to the list and xml tree.""" if path is None: path = self.config.default_folder download = Download(uri, path, headers) self.__append_download(download) self.__add_download_to_xml(download) return download
def listening_test(): data = json.loads(request.get_data(as_text=True)) url_id = data['url_id'] name = data['name'] author = data['author'] source = data['source'] d = Download(name, author, url_id, source) url = d.listening_test() ret = {'url': url} return jsonify(ret)
def parse_downloads(self, downloads): downloads_data = [] for download_json in downloads: url = "" url = download_json.get("download_url", "") if not url: url = download_json['url'] downloads_data.append(Download(download_json, url, self)) return downloads_data
def __init__(self, log, base_path): self.log = log self.config_path = os.path.join(base_path, 'config.ini') self.data = ConfigRead(self.config_path).read() # 解析配置文件 self.uncompressed_paths = Download(self.log, self.data).download() # 下载并解压安装包 self.excel_vol_titles = self.get_excel_titles() # excel表头,获取的数据类型 self.package_names = self.data['versions'].split( ',') if ',' in self.data['versions'] else [self.data['versions']]
def Download_Torrent_File(self): choose = int(input("select number: ")) for i in self.Informations(page=self.pg): if choose == int(i[0]): os.system("clear") print(f"[+] {i[1]}") print(f"[+] getting magnet") content = self.Contents(f'https://1337x.to{i[5]}') magnet_link = content.find("div", "no-top-radius").a["href"] Download(magnet_link) print("\n\t download started .. \n")
def main(): global _username, _domain, _classes, prevId init() print(Fore.WHITE + Style.BRIGHT + printBanner(), end='') with open(baseWritePath + prevId, 'r') as f: first = json.load(f) nextId = ''.join( random.choice(string.ascii_uppercase + string.digits) for _ in range(10)) first['NextId'] = nextId with open(baseWritePath + prevId, 'w') as f: json.dump(first, f) prevId = nextId content = waitAndReadFile(baseReadPath + prevId) print("[+] Connection successfully established!") time.sleep(3) print("[+] Patching ETW...") time.sleep(7) print("[+] Manually loading kernel32.dll to avoid API hooks.") time.sleep(7) print("[+] Manually loading advapi32.dll to avoid API hooks.") time.sleep(5) print("[+] Patching AMSI...") time.sleep(5) print("[+] H4ck th3 Pl4n3t!") userAndDomain = content['Output'] userAndDomain = userAndDomain.split("\\") _domain = userAndDomain[0] _username = userAndDomain[1] _classes.append(Download()) _classes.append(Base64encode()) _classes.append(Base64decode()) _classes.append(Compile()) _classes.append(Inject()) _classes.append(Powershell()) _classes.append(Send()) _classes.append(Impersonate()) _classes.append(Exfiltrate()) _classes.append(Runas()) _classes.append(Shell()) mainConsole() deinit()
def main(): initial_time = time() print(f'Run...') descargas = Download('config.json') descargas.checkFolders() descargas.scanFolder() final_time = time() total_time = final_time - initial_time print(f'End - Time: {total_time} seconds')
def get_page_num(self): xml = Download(self._start_url).request() if xml is False: return 1 last_url = xml.xpath('//a[@class="last"]')[0].xpath("@href")[0] html_names = re.findall(pattern=r"index_[\d]*.html", string=last_url) if len(html_names) >= 1: pages_num = int(html_names[0].replace("index_", "").replace(".html", "")) return pages_num else: return 1
def process_queue(): while True: # keep track that are processing url try: url = urllist.pop() print('url', url) D = Download() D.Downloader(url) except KeyError: # currently no urls to process break
def download(self, file_url, file_path, number): download = Download() download_url = download.download url = self.__real_url_head + file_url path = self.__store_path + file_path file_name = path.split('/').pop() file_name_size = len(file_name) file_path = path[:-file_name_size - 1] (target_size, real_size) = download_url(url, file_name, file_path, int(number), self.__files_number) self.__real_file_size += real_size
def init_from_json(self, json): self.name = json.get('name') self.description = json.get('description') self.organization_name = json.get('organization').get('name') self.political_level = json.get('organization').get('political_level') self.tags = json.get('tags') self.downloads = [ Download(download, download.get('url'), self) for download in json['downloads'] ] self.id = json.get('id') self.visits = json.get('visits', 0)
def download(): data = json.loads(request.get_data(as_text=True)) url_id = data['url_id'] name = data['name'] author = data['author'] source = data['source'] d = Download(name, author, url_id, source) is_ok = d.download() if is_ok: ret = {'state': 1} else: ret = {'state': 0} return jsonify(ret)
def transfer(self, session): path = os.path.join(self.release, self.name) with Download(path) as download: if download.is_complete(session["log"]): return bytes = format_size(self.bytes) session["log"].write("{} ({}/{}, ~{})\n".format( self.name, session["file"], session["files"], bytes)) with ExitStack() as cleanup: decoder = yencread.FileDecoder(session["log"], PipeWriter()) coroutine = self._receive(download, decoder) cleanup.enter_context(decoder.pipe.coroutine(coroutine)) cleanup.callback(Progress.close, session["log"]) for [segment, id] in self.iter_segments(): number = int(segment.get("number")) - 1 if download.is_done(number): continue for _ in range(5): try: session["nntp"].body(id, file=decoder.pipe) except nntp.failure_responses as err: raise SystemExit(err) except EOFError as err: msg = format(err) or "Connection dropped" except timeouts as err: msg = format(err) else: break session["log"].write(msg + "\n") session["nntp"].log_time() decoder.pipe.close() # Decoder coroutine raises EOFError if input aborted with suppress(EOFError): cleanup.close() session["nntp"].connect() pipe = PipeWriter() decoder = yencread.FileDecoder(session["log"], pipe) coroutine = self._receive(download, decoder) # TODO: should not re-open download etc coroutine = decoder.pipe.coroutine(coroutine) cleanup.enter_context(coroutine) else: raise SystemExit("Failed retrieving {} segment {} " "<{}> (attempts: {})".format( self.name, 1 + number, id)) decoder.pipe.close()
def main(): # print 'Running.' # url = 'https://blog.csdn.net/GitChat' # download = Download() # articledb = ArticleDB(DB(*config)) # spider = Spider(url, download, articledb) # spider.start() # print 'Done.' # index url = 'https://blog.csdn.net/' download = Download() html = download.down_html(url, save=True) print html
def get_info_log(self, new_connection_dict: dict, fname: str) -> NoReturn: """ Metodo que recorre el fichero linea a linea y si existe el indice id,ip en el diccionario obtiene ek objeto asociado a ese indice y le añade esa linea, que solo guardara si tiene informacion util :param new_connection_dict: :param fname: :return: """ with open(fname, 'r') as fp: for line in fp: info = parser_id_ip(line) if info is not None: if info in new_connection_dict: # Las lineas que tenemos en el diccionario las parseamos new_connection_dict[info].add_line(line) if re.match(r'.*CMD:.*wget.*', line, re.IGNORECASE): self._connection_wget.append( info ) # si la linea contiene un wget guardo esa conexion else: # Lineas con id,ip pero que no tienen New connection y no estan en el diccionario con_aux = ConnectionAux(parser_ip_any_line(line), '', '') con_aux.set_id(parser_id_to_session(line)) new_connection_dict[info] = NewConnection( con_aux, self._logger, self._geoip2_db) new_connection_dict[info].add_line(line) else: # Lineas que no tienen id,ip # Añadimos al fichero auxiliar de lineas no tratadas todas las lineas que no tengan la id,ip en el # el diccionario y que no sean una New connection ya que estan ya han sido tratadas if not re.match(r'.*New connection:.*', line, re.IGNORECASE) and len(line) > 2: regex = r'^.*Downloaded URL \(b?\'(.*)\'\) with SHA-\d+ (\w+) to (.*)$' if re.match(regex, line, re.IGNORECASE): d = re.search(regex, line, re.IGNORECASE) download = Download(d.group(1), d.group(2), d.group(3), parser_date_time(line)) self._listCommandWget.append(download) self.update_command_connection(new_connection_dict) for conect in new_connection_dict.values(): if conect.is_completed(): write_file(conect.get_json(), self._log_completed, 'a') elif conect.is_session(): write_file(conect.get_json(), self._log_aux_session, 'a') else: write_file(conect.get_json(), self._log_aux_no_session, 'a')