def _copy_chunks(self): init = format.path(self.info.get_chunk_name(0)) #: initial chunk name if self.info.get_count() > 1: with io.open(init, "rb+") as fo: #: first chunkfile for i in range(1, self.info.get_count()): # input file fo.seek( self.info.get_chunk_range(i - 1)[1] + 1 ) #: seek to beginning of chunk, to get rid of overlapping chunks fname = format.path("{0}.chunk{1:d}".format(self.path, i)) buf = 32 << 10 with io.open(fname, mode='rb') as fi: while True: #: copy in chunks, consumes less memory data = fi.read(buf) if not data: break fo.write(data) if fo.tell() < self.info.get_chunk_range(i)[1]: remove(init) self.info.remove( ) #: there are probably invalid chunks raise Exception( "Downloaded content was smaller than expected. Try to reduce download connections" ) remove(fname) #: remove chunk if self.name: self.path = format.path(os.path.dirname(self.path), self.name) shutil.move(init, format.path(self.path)) self.info.remove() #: remove info file
def _copy_chunks(self): init = format.path(self.info.get_chunk_name(0)) #: initial chunk name if self.info.get_count() > 1: with io.open(init, "rb+") as fo: #: first chunkfile for i in range(1, self.info.get_count()): # input file fo.seek( self.info.get_chunk_range(i - 1)[1] + 1) #: seek to beginning of chunk, to get rid of overlapping chunks fname = format.path("{0}.chunk{1:d}".format(self.path, i)) buf = 32 << 10 with io.open(fname, mode='rb') as fi: while True: #: copy in chunks, consumes less memory data = fi.read(buf) if not data: break fo.write(data) if fo.tell() < self.info.get_chunk_range(i)[1]: remove(init) self.info.remove() #: there are probably invalid chunks raise Exception( "Downloaded content was smaller than expected. Try to reduce download connections") remove(fname) #: remove chunk if self.name: self.path = format.path(os.path.dirname(self.path), self.name) shutil.move(init, format.path(self.path)) self.info.remove() #: remove info file
def download(self, ip, port, filename, irc, progress_notify=None): ircbuffer = "" last_update = time() cum_recv_len = 0 with closing(self.create_socket()) as dccsock: dccsock.settimeout(self.timeout) dccsock.connect((ip, port)) if os.path.exists(filename): i = 0 name_parts = filename.rpartition(".") while True: newfilename = "{0}-{1:d}{2}{3}".format( name_parts[0], i, name_parts[1], name_parts[2]) i += 1 if not os.path.exists(newfilename): filename = newfilename break with io.open(filename, mode='wb') as fp: # recv loop for dcc socket while True: if self.abort: # dccsock.close() remove(filename, trash=True) raise Abort self._keep_alive(irc, ircbuffer) data = dccsock.recv(4096) data_len = len(data) self.recv += data_len cum_recv_len += data_len now = time() timespan = now - last_update if timespan > 1: self.speed = cum_recv_len // timespan cum_recv_len = 0 last_update = now if progress_notify: progress_notify(self.percent) if not data: break fp.write(data) # acknowledge data by sending number of received bytes dccsock.send(struct.pack('!I', self.recv)) return filename
def setUp(self): PluginTester.setUp(self) for f in self.files: file = os.path.join(DL_DIR, f) if os.path.exists(file): remove(file, trash=True) # folder for reports report = os.path.join(self.__class__.__name__) if os.path.exists(report): for f in os.listdir(report): remove(os.path.join(report, f), trash=True)
def shutdown(self, cleanup=None): if cleanup is None: cleanup = self._cleanup try: if self.is_alive(): self._stop() self.log.info(_("Exiting pyLoad ...")) self.db.shutdown() if cleanup: self.log.info(_("Deleting temp files ...")) remove(self.tmpdir, ignore_errors=True) finally: return self.terminate(self)
def setUp(self): PluginTester.setUp(self) for fname in self.files: path = os.path.join(DL_DIR, fname) remove(path, trash=True, ignore_errors=True) # folder for reports report = os.path.join(self.__class__.__name__) if not os.path.exists(report): return None for fname in os.listdir(report): path = os.path.join(report, fname) remove(path, trash=True)
def _decrypt(self, urls): """ Internal method to select decrypting method :param urls: List of urls/content :return: (links, packages) """ cls = self.__class__ # separate local and remote files content, urls = self.get_local_content(urls) result = [] if urls and hasmethod(cls, "decrypt"): self.log_debug("Deprecated .decrypt() method in Crypter plugin") result = [] for url in urls: self.pyfile = PyFileMockup(url, cls.__name__) self.setup() self.decrypt(self.pyfile) result.extend(self.convert_packages()) elif urls: method = True try: self.setup() result = to_list(self.decrypt_urls(urls), []) except NotImplementedError: method = False # this will raise error if not implemented if not method: for url in urls: self.setup() result.extend(to_list(self.decrypt_url(url), [])) for f, c in content: self.setup() result.extend(to_list(self.decrypt_file(c), [])) try: if f.startswith("tmp_"): remove(f) except IOError: self.log_warning(_("Could not delete file '{0}'").format(f)) # self.pyload.print_exc() return to_link_list(result)
def check_download(self, rules, api_size=0, max_size=50000, delete=True, read_size=0): """ Checks the content of the last downloaded file, re match is saved to `last_check` :param rules: dict with names and rules to match (compiled regexp or strings) :param api_size: expected file size :param max_size: if the file is larger then it wont be checked :param delete: delete if matched :param read_size: amount of bytes to read from files larger then max_size :return: dictionary key of the first rule that matched """ if not os.path.exists(self.last_download): return None size = os.stat(self.last_download) size = size.st_size if api_size and api_size <= size: return None elif size > max_size and not read_size: return None self.pyload.log.debug("Download Check triggered") with io.open(self.last_download, mode='rb') as fp: content = fp.read(read_size if read_size else -1) # produces encoding errors, better log to other file in the future? #self.pyload.log.debug("Content: {0}".format(content)) for name, rule in rules.items(): if isinstance(rule, str): if rule in content: if delete: remove(self.last_download, trash=True) return name elif hasattr(rule, "search"): m = rule.search(content) if m: if delete: remove(self.last_download, trash=True) self.last_check = m return name
def run_tesser(self, subset=False, digits=True, lowercase=True, uppercase=True): # self.log.debug("create tmp tif") # tmp = tempfile.NamedTemporaryFile(suffix=".tif") tmp_path = os.path.join("tmpTif_{0}.tif".format(self.__name__)) tmp = io.open(tmp_path, mode='wb') tmp.close() # self.log.debug("create tmp txt") # tmp_txt = tempfile.NamedTemporaryFile(suffix=".txt") tmp_txt_path = os.path.join("tmp_txt_{0}.txt".format( self.__name__)) tmp_txt = io.open(tmp_txt_path, mode='wb') tmp_txt.close() self.log.debug("save tiff") self.image.save(tmp.name, 'TIFF') if os.name == 'nt': tessparams = [os.path.join(COREDIR, "tesseract", "tesseract.exe")] else: tessparams = ['tesseract'] tessparams.extend([os.path.abspath(tmp.name), os.path.abspath( tmp_txt.name).replace(".txt", "")]) if subset and (digits or lowercase or uppercase): # self.log.debug("create temp subset config") # tmp_sub = tempfile.NamedTemporaryFile(suffix=".subset") with io.open(os.path.join("tmp_sub_{0}.subset".format(self.__name__)), mode='wb') as tmp_sub: tmp_sub.write("tessedit_char_whitelist ") if digits: tmp_sub.write("0123456789") if lowercase: tmp_sub.write("abcdefghijklmnopqrstuvwxyz") if uppercase: tmp_sub.write("ABCDEFGHIJKLMNOPQRSTUVWXYZ") tmp_sub.write("\n") tessparams.append("nobatch") tessparams.append(os.path.abspath(tmp_sub.name)) self.log.debug("run tesseract") self.run(tessparams) self.log.debug("read txt") try: with io.open(tmp_txt.name) as fp: self.result_captcha = fp.read().replace("\n", "") except Exception: self.result_captcha = "" self.log.debug(self.result_captcha) try: remove(tmp.name) remove(tmp_txt.name) if subset and (digits or lowercase or uppercase): remove(tmp_sub.name) except Exception: pass
def init(self): """ Main loop, which executes commands. """ version = self._check_version() self.conn = sqlite3.connect(self.DB_FILE) os.chmod(self.DB_FILE, 0o600) self.c = self.conn.cursor() if version is not None and version < DB_VERSION: success = self._convert_db(version) # delete database if not success: self.c.close() self.conn.close() try: self.manager.pyload.log.warning( _("Database was deleted due to incompatible version")) except Exception: print("Database was deleted due to incompatible version") remove(self.VERSION_FILE) shutil.move(self.DB_FILE, self.DB_FILE + ".bak") with io.open(self.VERSION_FILE, mode='wb') as fp: fp.write(str(DB_VERSION)) self.conn = sqlite3.connect(self.DB_FILE) os.chmod(self.DB_FILE, 0o600) self.c = self.conn.cursor() self._create_tables() self.conn.commit()
def decrypt_captcha(self, url, get={}, post={}, cookies=True, forceuser=False, imgtype='jpg', result_type='textual'): """ Loads a captcha and decrypts it with ocr, plugin, user input :param url: url of captcha image :param get: get part for request :param post: post part for request :param cookies: True if cookies should be enabled :param forceuser: if True, ocr is not used :param imgtype: Type of the Image :param result_type: 'textual' if text is written on the captcha or 'positional' for captcha where the user have to click on a specific region on the captcha :return: result of decrypting """ img = self.load(url, get=get, post=post, cookies=cookies) id = "{0:.2f}".format(time.time())[-6:].replace(".", "") with io.open(os.path.join("tmp_captcha_{0}_{1}.{2}".format( self.__name__, id, imgtype)), mode='wb') as fp: fp.write(img) name = "{0}OCR".format(self.__name__) has_plugin = name in self.pyload.pgm.get_plugins("internal") if self.pyload.captcha: OCR = self.pyload.pgm.load_class("internal", name) else: OCR = None if OCR and not forceuser: time.sleep(random.randint(3000, 5000) // 1000) self.check_abort() ocr = OCR() result = ocr.get_captcha(fp.name) else: task = self.pyload.exm.create_captcha_task( img, imgtype, fp.name, self.__name__, result_type) self.task = task while task.is_waiting(): if self.abort(): self.pyload.exm.remove_task(task) raise Abort time.sleep(1) # TODO: task handling self.pyload.exm.remove_task(task) if task.error and has_plugin: #: ignore default error message since the user could use OCR self.fail( _("Pil and tesseract not installed and no Client connected for captcha decrypting" )) elif task.error: self.fail(task.error) elif not task.result: self.fail( _("No captcha result obtained in appropriate time")) result = task.result self.pyload.log.debug( "Received captcha result: {0}".format(result)) if not self.pyload.debug: try: remove(fp.name) except Exception: pass return result
def remove(self): fs_name = format.path("{0}.chunks".format(self.name)) remove(fs_name)
def setUpClass(cls): cls.core = Core() name = "{0}.{1}".format(cls.__module__, cls.__name__) for f in glob(os.path.join(name, "debug_*")): remove(f, trash=True)
def _download(self, chunks, resume): if not resume: self.info.clear() self.info.add_chunk("{0}.chunk0".format( self.path), (0, 0)) #: create an initial entry self.chunks = [] # initial chunk that will load complete file (if needed) init = CurlChunk(0, self, None, resume) self.chunks.append(init) self.manager.add_handle(init.get_handle()) last_finish_check = 0 last_time_check = 0 chunks_done = set() #: list of curl handles that are finished chunks_created = False done = False if self.info.get_count() > 1: #: This is a resume, if we were chunked originally assume still can self.chunk_support = True while True: # need to create chunks if not chunks_created and self.chunk_support and self.size: #: will be set later by first chunk self.flags ^= Connection.Resumable if not resume: self.info.set_size(self.size) self.info.create_chunks(chunks) self.info.save() chunks = self.info.get_count() init.set_range(self.info.get_chunk_range(0)) for i in range(1, chunks): c = CurlChunk( i, self, self.info.get_chunk_range(i), resume) handle = c.get_handle() if handle: self.chunks.append(c) self.manager.add_handle(handle) else: # close immediately self.pyload.log.debug("Invalid curl handle -> closed") c.close() chunks_created = True while True: ret, num_handles = self.manager.perform() if ret != pycurl.E_CALL_MULTI_PERFORM: break t = time() # reduce these calls # when num_q is 0, the loop is exited while last_finish_check + 0.5 < t: # list of failed curl handles failed = [] ex = None #: save only last exception, we can only raise one anyway num_q, ok_list, err_list = self.manager.info_read() for c in ok_list: chunk = self.find_chunk(c) try: #: check if the header implies success, else add it to failed list chunk.verify_header() except ResponseException as e: self.pyload.log.debug( "Chunk {0:d} failed: {1}".format(chunk.id + 1, e.message)) failed.append(chunk) ex = e else: chunks_done.add(c) for c in err_list: curl, errno, msg = c chunk = self.find_chunk(curl) # test if chunk was finished if errno != 23 or "0 !=" not in msg: failed.append(chunk) ex = pycurl.error(errno, msg) self.pyload.log.debug( "Chunk {0:d} failed: {1}".format(chunk.id + 1, ex)) continue try: #: check if the header implies success, else add it to failed list chunk.verify_header() except ResponseException as e: self.pyload.log.debug( "Chunk {0:d} failed: {1}".format(chunk.id + 1, e.message)) failed.append(chunk) ex = e else: chunks_done.add(curl) if not num_q: #: no more info to get # check if init is not finished so we reset download connections # note that other chunks are closed and everything # downloaded with initial connection if failed and init not in failed and init.c not in chunks_done: self.pyload.log.error( _("Download chunks failed, fallback to single connection | {0}".format(ex))) # list of chunks to clean and remove to_clean = [x for x in self.chunks if x is not init] for chunk in to_clean: self.close_chunk(chunk) self.chunks.remove(chunk) remove( format.path( self.info.get_chunk_name( chunk.id))) # let first chunk load the rest and update the info # file init.reset_range() self.info.clear() self.info.add_chunk("{0}.chunk0".format( self.path), (0, self.size)) self.info.save() elif failed: raise ex last_finish_check = t if len(chunks_done) >= len(self.chunks): if len(chunks_done) > len(self.chunks): self.pyload.log.warning( _("Finished download chunks size incorrect, please report bug")) done = True #: all chunks loaded break if done: break #: all chunks loaded # calc speed once per second, averaging over 3 seconds if last_time_check + 1 < t: diff = [c.arrived - (self.last_arrived[i] if len(self.last_arrived) > i else 0) for i, c in enumerate(self.chunks)] self.last_speeds[1] = self.last_speeds[0] self.last_speeds[0] = self.speeds self.speeds = [float(a) // (t - last_time_check) for a in diff] self.last_arrived = [c.arrived for c in self.chunks] last_time_check = t if self.do_abort: raise Abort self.manager.select(1) for chunk in self.chunks: chunk.flush_file() #: make sure downloads are written to disk self._copy_chunks()
def _download(self, chunks, resume): if not resume: self.info.clear() self.info.add_chunk("{0}.chunk0".format(self.path), (0, 0)) #: create an initial entry self.chunks = [] # initial chunk that will load complete file (if needed) init = CurlChunk(0, self, None, resume) self.chunks.append(init) self.manager.add_handle(init.get_handle()) last_finish_check = 0 last_time_check = 0 chunks_done = set() #: list of curl handles that are finished chunks_created = False done = False if self.info.get_count( ) > 1: #: This is a resume, if we were chunked originally assume still can self.chunk_support = True while True: # need to create chunks if not chunks_created and self.chunk_support and self.size: #: will be set later by first chunk self.flags ^= Connection.Resumable if not resume: self.info.set_size(self.size) self.info.create_chunks(chunks) self.info.save() chunks = self.info.get_count() init.set_range(self.info.get_chunk_range(0)) for i in range(1, chunks): c = CurlChunk(i, self, self.info.get_chunk_range(i), resume) handle = c.get_handle() if handle: self.chunks.append(c) self.manager.add_handle(handle) else: # close immediately self.pyload.log.debug("Invalid curl handle -> closed") c.close() chunks_created = True while True: ret, num_handles = self.manager.perform() if ret != pycurl.E_CALL_MULTI_PERFORM: break t = time() # reduce these calls # when num_q is 0, the loop is exited while last_finish_check + 0.5 < t: # list of failed curl handles failed = [] ex = None #: save only last exception, we can only raise one anyway num_q, ok_list, err_list = self.manager.info_read() for c in ok_list: chunk = self.find_chunk(c) try: #: check if the header implies success, else add it to failed list chunk.verify_header() except ResponseException as e: self.pyload.log.debug("Chunk {0:d} failed: {1}".format( chunk.id + 1, str(e))) failed.append(chunk) ex = e else: chunks_done.add(c) for c in err_list: curl, errno, msg = c chunk = self.find_chunk(curl) # test if chunk was finished if errno != 23 or "0 !=" not in msg: failed.append(chunk) ex = pycurl.error(errno, msg) self.pyload.log.debug("Chunk {0:d} failed: {1}".format( chunk.id + 1, ex)) continue try: #: check if the header implies success, else add it to failed list chunk.verify_header() except ResponseException as e: self.pyload.log.debug("Chunk {0:d} failed: {1}".format( chunk.id + 1, str(e))) failed.append(chunk) ex = e else: chunks_done.add(curl) if not num_q: #: no more info to get # check if init is not finished so we reset download connections # note that other chunks are closed and everything # downloaded with initial connection if failed and init not in failed and init.c not in chunks_done: self.pyload.log.error( _("Download chunks failed, fallback to single connection | {0}" .format(ex))) # list of chunks to clean and remove to_clean = [x for x in self.chunks if x is not init] for chunk in to_clean: self.close_chunk(chunk) self.chunks.remove(chunk) remove( format.path(self.info.get_chunk_name( chunk.id))) # let first chunk load the rest and update the info # file init.reset_range() self.info.clear() self.info.add_chunk("{0}.chunk0".format(self.path), (0, self.size)) self.info.save() elif failed: raise ex last_finish_check = t if len(chunks_done) >= len(self.chunks): if len(chunks_done) > len(self.chunks): self.pyload.log.warning( _("Finished download chunks size incorrect, please report bug" )) done = True #: all chunks loaded break if done: break #: all chunks loaded # calc speed once per second, averaging over 3 seconds if last_time_check + 1 < t: diff = [ c.arrived - (self.last_arrived[i] if len(self.last_arrived) > i else 0) for i, c in enumerate(self.chunks) ] self.last_speeds[1] = self.last_speeds[0] self.last_speeds[0] = self.speeds self.speeds = [float(a) // (t - last_time_check) for a in diff] self.last_arrived = [c.arrived for c in self.chunks] last_time_check = t if self.do_abort: raise Abort self.manager.select(1) for chunk in self.chunks: chunk.flush_file() #: make sure downloads are written to disk self._copy_chunks()
def _remove_pid(self): try: fcntl.flock(self._lockfd, fcntl.LOCK_UN) except IOError: pass remove(self.pidfile, ignore_errors=True)
def decrypt_captcha(self, url, get={}, post={}, cookies=True, forceuser=False, imgtype='jpg', result_type='textual'): """ Loads a captcha and decrypts it with ocr, plugin, user input :param url: url of captcha image :param get: get part for request :param post: post part for request :param cookies: True if cookies should be enabled :param forceuser: if True, ocr is not used :param imgtype: Type of the Image :param result_type: 'textual' if text is written on the captcha or 'positional' for captcha where the user have to click on a specific region on the captcha :return: result of decrypting """ img = self.load(url, get=get, post=post, cookies=cookies) id = "{0:.2f}".format(time.time())[-6:].replace(".", "") with io.open(os.path.join("tmp_captcha_{0}_{1}.{2}".format(self.__name__, id, imgtype)), mode='wb') as fp: fp.write(img) name = "{0}OCR".format(self.__name__) has_plugin = name in self.pyload.pgm.get_plugins("internal") if self.pyload.captcha: OCR = self.pyload.pgm.load_class("internal", name) else: OCR = None if OCR and not forceuser: time.sleep(random.randint(3000, 5000) // 1000.0) self.check_abort() ocr = OCR() result = ocr.get_captcha(fp.name) else: task = self.pyload.itm.create_captcha_task( img, imgtype, fp.name, self.__name__, result_type) self.task = task while task.is_waiting(): if self.abort(): self.pyload.itm.remove_task(task) raise Abort time.sleep(1) # TODO: task handling self.pyload.itm.remove_task(task) if task.error and has_plugin: #: ignore default error message since the user could use OCR self.fail( _("Pil and tesseract not installed and no Client connected for captcha decrypting")) elif task.error: self.fail(task.error) elif not task.result: self.fail( _("No captcha result obtained in appropriate time")) result = task.result self.pyload.log.debug( "Received captcha result: {0}".format(result)) if not self.pyload.debug: try: remove(fp.name) except Exception: pass return result