def _choose_best_file(self, files): files = sorted( files, key=lambda f: SanitizedRelFile( base=settings.get("output.base_dir"), file_path=f.path).size(), reverse=True) return files[0], files[1:]
def test_put(self): """ Put should work as expected """ settings.put('auth.oauth_key', 'value') self.assertEqual(settings.get('oauth_key', cat='auth'), 'value', msg='Failed to set correct value!') with self.assertRaises(TypeError, msg="Failed to catch invalid value!"): settings.put('interface.start_server', 'invalid')
def load(self): """ Threaded loading of elements. """ settings.from_json(self.settings) sql.init_from_settings() self._session = sql.session() self.progress.set_scanning(True) retry_failed = settings.get('processing.retry_failed') # Query for all unhandled URLs, and submit them before scanning for new Posts. unfinished = self._session\ .query(sql.URL)\ .filter((sql.URL.processed == False) | (retry_failed and sql.URL.failed == True))\ .all() self._push_url_list(unfinished) self._scan_sources() self.progress.set_scanning(False) # Wait for any remaining ACKS to come in, before closing the writing pipe. # ...Until the Downloaders have confirmed completion of everything, more album URLS may come in. while len(self._open_ack) > 0 and not self._stop_event.is_set(): self._handle_acks(timeout=0.5) print("Finished loading.") sql.close()
def load(self): """ Threaded loading of elements. """ settings.from_json(self.settings) sql.init_from_settings() self._session = sql.session() t_start = datetime.now() #vy print("Started loading.") #vy self.progress.set_scanning(True) retry_failed = settings.get('processing.retry_failed') # Query for all unhandled URLs, and submit them before scanning for new Posts. unfinished = self._session\ .query(sql.URL)\ .filter((sql.URL.processed == False) | \ (retry_failed and sql.URL.failed and \ sql.not_(sql.URL.failure_reason.contains('404'))))\ .all() print("Loading %s unfinished urls" % len(unfinished)) self._push_url_list(unfinished) self._scan_sources() self.progress.set_scanning(False) # Wait for any remaining ACKS to come in, before closing the writing pipe. # ...Until the Downloaders have confirmed completion of everything, more album URLS may come in. while len(self._open_ack) > 0 and not self._stop_event.is_set(): self._handle_acks(timeout=1.0, clear=True) print("Finished loading.") #vy print("Elapsed time: %s" % str(datetime.now() - t_start)) #vy sql.close()
def api_search_posts(fields, term): ret = [] searcher = sql.PostSearcher(_session) for p in searcher.search_fields(fields, term.strip("%")): files = [] for url in p.urls: if not url.file: print('Post URL Missing a File:', url) continue file = SanitizedRelFile(base=settings.get("output.base_dir"), file_path=url.file.path) if file.is_file(): files.append({'token': url.file.id, 'path': file.absolute()}) if len(files): ret.append({ 'reddit_id': p.reddit_id, 'author': p.author, 'type': p.type, 'title': p.title, 'body': p.body, 'parent_id': p.parent_id, 'subreddit': p.subreddit, 'over_18': p.over_18, 'created_utc': p.created_utc, 'num_comments': p.num_comments, 'score': p.score, 'source_alias': p.source_alias, 'files': files }) return ret
def setUp(self): global download_ran, session, thread if not download_ran: download_ran = True self.wui = WebUI('test_version') self.db_path = join(settings.get('output.base_dir'), 'manifest.sqlite') self.url = 'http://%s:%s/index.html#' % (settings.get('interface.host'), settings.get('interface.port')) settings.load(self.settings_file) settings.put('interface.start_server', True) sql.init_from_settings() session = sql.session() thread = Thread(target=self.wui.display) thread.setDaemon(True) thread.start() self.assertTrue(self.wui.waitFor(10), msg='WebUI Failed to start!')
def login(): global _user, _logged_in if not settings.get('auth.refresh_token'): raise ConnectionError('Missing the Refresh Token from Reddit! Cannot auth.') stringutil.print_color('yellow', "Authenticating via OAuth...") _user = _reddit.user.me() stringutil.print_color('yellow', "Authenticated as [%s]\n" % _user.name) _logged_in = True
def _create_downloaders(self): dls = [] for i in range(settings.get('threading.concurrent_downloads')): tp = Downloader(reader=self.loader.get_reader(), ack_queue=self.loader.get_ack_queue(), settings_json=settings.to_json()) dls.append(tp) return dls
def display(self): if started: return False webdir = os.path.join(os.path.dirname(__file__), '../web/') filedir = os.path.abspath(settings.get("output.base_dir")) start(web_dir=webdir, file_dir=filedir, rmd_version=self.rmd_version) while not stopped: eel.sleep(1)
def test_conversion(self): """ Settings should cast values correctly """ s = settings.get('interface.port', full_obj=True) s.set(10) self.assertEqual(s.val(), 10, msg='Setting set() failed!') s.set('1337.1') self.assertEqual(s.val(), 1337, msg='Setting set() string->int failed!') s = settings.get('interface.start_server', full_obj=True) s.set('Yes') self.assertTrue(s.val(), msg='Failed to cast boolean True from "Yes".') s.set('n') self.assertFalse(s.val(), msg='Failed to cast boolean True from "n".') s.set(1) self.assertTrue(s.val(), msg='Failed to cast boolean True from 1.') with self.assertRaises(TypeError, msg='Failed to catch Type error!'): s.set('fake')
def init_from_settings(): """ Builds the database file using the Settings currently loaded. """ # db_file = SanitizedRelFile(base=settings.get("output.base_dir"), file_path="manifest.sqlite") # original db_file = SanitizedRelFile( base=settings.get("output.manifest_for_sqlite_dir"), file_path="manifest.sqlite" ) # This is part of the change to save manifest.sqlite to a different directory than the downloads db_file.mkdirs() init(db_file.absolute())
def _upgrade_file(self, new_file, old_file): # print('Upgrading old file:', old_file.id, old_file.path, ' -> ', new_file.id, new_file.path) self._session.query(URL). \ filter(URL.file_id == old_file.id). \ update({URL.file_id: new_file.id}) file = SanitizedRelFile(base=settings.get("output.base_dir"), file_path=old_file.path) if file.is_file(): file.delete_file()
def api_get_oauth_url(): port = 7505 url = False message = '' if settings.get('interface.port') != port: message = 'The UI is not using the default port (%s), and cannot use the premade App to authenticate!' % port else: url = praw_wrapper.get_reddit_token_url() return {'url': url, 'message': message}
def _req_args(): """ Settings all Requests should use. """ return { 'headers': { 'User-Agent': settings.get('auth.user_agent') }, 'timeout': 10, 'allow_redirects': True }
def wait_refresh_rate(self): """ Waits for the "refresh delay" configured in the settings, or exits early if processing finished before then. :return: True if the delay was fully awaited, or False if processing has completed. """ if not self.loader.get_stop_event().is_set(): self.loader.get_stop_event().wait(settings.get("threading.display_refresh_rate")) return True return False
def api_get_oauth_url(): port = 7505 url = False message = '' if settings.get('interface.port') != port: message = 'The UI is not using the default port (%s), and cannot use the Web App to authenticate! ' \ 'Run RMD with "--authorize" to manually authenticate!' % port else: url = praw_wrapper.get_reddit_token_url() return {'url': url, 'message': message}
def _authorize_rmd_token(): state = eel.btl.request.query.state print('New refresh code request: ', state, eel.btl.request.query.code) if state.strip() == settings.get('auth.oauth_key').strip(): code = eel.btl.request.query.code print('Saving new reddit code.') refresh = praw_wrapper.get_refresh_token(code) if refresh: settings.put('auth.refresh_token', refresh) return 'Saved authorization token! Close this page to continue.' return 'Cannot save the new auth key, something went wrong.<br><a href="../index.html">Back</a>'
def setUp(self): global download_ran, session if not download_ran: download_ran = True settings.load(self.settings_file) tui = TerminalUI() tui.display() # self.db_path = join(settings.get('output.base_dir'), 'manifest.sqlite') self.db_path = join(settings.get('output.manifest_for_sqlite_dir'), 'manifest.sqlite') # This is part of the change to save manifest.sqlite to a different directory than the downloads sql.init_from_settings() session = sql.session()
def setUp(self): global download_ran, session if not download_ran: download_ran = True settings.load(self.settings_file) tui = TerminalUI() tui.display() self.db_path = join(settings.get('output.base_dir'), 'manifest.sqlite') sql.init_from_settings() session = sql.session()
def _downloaded_files(): """ Allows the UI to request files RMD has scraped. In format: "./file?id=file_token" """ token = eel.btl.request.query.id file_obj = _session.query(sql.File).filter(sql.File.id == token).first() file_path = file_obj.path response = eel.btl.static_file(file_path, root=os.path.abspath( settings.get("output.base_dir"))) response.set_header("Cache-Control", "public, max-age=0") return response
def _choose_base_name(post): """ Generate the base file name, missing any extensions. Respects a maximum absolute filepath length. :param post: An sql.Post object. :return: The RelFile generated, with the path variables inserted and formatted. """ global _pattern_array dct = json.loads(json.dumps(post.__dict__, default=lambda o: None)) # Deep copy, safely removing any handles/refs. dct['created_date'] = datetime.fromtimestamp(dct['created_utc']).strftime('%Y-%m-%d') dct['created_time'] = datetime.fromtimestamp(dct['created_utc']).strftime('%H.%M.%S') if not _pattern_array: file_pattern = './%s' % settings.get('output.file_name_pattern').strip('/\\ .') _pattern_array = _parse_pattern(file_pattern, dct) max_len = 200 length = min(max_len, max(len(str(dct[k])) for k in dct.keys())) while max_len: output = SanitizedRelFile(base=settings.get("output.base_dir"), file_path=_build_str(dct, length)) if len(output.absolute()) <= max_len: return output length -= 1 raise Exception("Unable to name file properly! Filename is likely too long!")
def _choose_base_name(post): """ Generate the base file name, missing any extensions. Respects a maximum absolute filepath length. :param post: An sql.Post object. :return: The RelFile generated, with the path variables inserted and formatted. """ global _pattern_array if not _pattern_array: file_pattern = './%s' % settings.get('output.file_name_pattern').strip( '/\\ .') _pattern_array = _parse_pattern(file_pattern, post.__dict__) max_len = 200 length = min(max_len, max(len(str(post.__dict__[k])) for k in post.__dict__.keys())) while max_len: output = SanitizedRelFile(base=settings.get("output.base_dir"), file_path=_build_str(post.__dict__, length)) if len(output.absolute()) <= max_len: return output length -= 1 raise Exception( "Unable to name file properly! Filename is likely too long!")
def __init__(self, source_patterns=None): super().__init__() self.daemon = False self.sources = source_patterns self.sources = self.load_sources() # initialize Loader self.loader = RedditLoader(sources=self.sources, settings_json=settings.to_json()) self.deduplicator = Deduplicator( settings_json=settings.to_json(), stop_event=self.loader.get_stop_event()) self._downloaders = self._create_downloaders() self._all_processes = [self.loader, *self._downloaders] if settings.get('processing.deduplicate_files'): self._all_processes.append(self.deduplicator)
def _websocket_close(page, old_websockets): global stopped print('A WebUI just closed. Checking for other connections... (%s)[%s]' % (page, len(old_websockets))) for i in range(40): eel.sleep(.1) # noinspection PyProtectedMember if len(eel._websockets) > 0: print('Open connections still exist. Not stopping UI server.') return if not settings.get('interface.keep_open'): print('WebUI keep_open is disabled, and all open clients have closed. Exiting.') if _controller and _controller.is_alive(): _controller.stop() stopped = True else: print('Keeping UI server open...')
def print_info(self, prog): if settings.get('threading.console_clear_screen'): print('\n' * 10, colorama.ansi.clear_screen()) scanning = '+' if prog.loader.get_scanning() else '' print( "Remaining: %s/%s%s" % (prog.loader.get_queue_size(), prog.loader.get_found(), scanning)) rj = 10 for progress in prog.downloaders: print() print('File:', progress.get_file()) print('Handler:'.rjust(rj), progress.get_handler()) print('Status:'.rjust(rj), progress.get_status()) if progress.get_percent(): print('Percent:'.rjust(rj), '%s%%' % progress.get_percent()) else: print()
def __init__(self, source_patterns=None): super().__init__() sql.init_from_settings() # Make sure the database is built & migrated before starting threads. sql.close() self.daemon = False self.sources = source_patterns self.sources = self.load_sources() self.db_lock = RLock() # initialize Loader self.loader = RedditLoader(sources=self.sources, settings_json=settings.to_json(), db_lock=self.db_lock) self.deduplicator = Deduplicator( settings_json=settings.to_json(), stop_event=self.loader.get_stop_event(), db_lock=self.db_lock ) self._downloaders = self._create_downloaders() self._all_processes = [self.loader, *self._downloaders] if settings.get('processing.deduplicate_files'): self._all_processes.append(self.deduplicator)
def init(): """ Sets the credentials to sign in with. """ global _reddit refresh = None if settings.get('auth.refresh_token'): refresh = settings.get('auth.refresh_token') _reddit = praw.Reddit( client_id=settings.get('auth.rmd_client_key'), client_secret=None, redirect_uri='http://%s:%s/authorize' % (settings.get('interface.host'), settings.get('interface.port')), user_agent=settings.get('auth.user_agent'), refresh_token=refresh)
def run(self, url, file): tmp_file = file.abs_hashed() tmp_hash = os.path.basename(tmp_file) file.mkdirs() ydl_opts = { 'logger': Logger(), 'progress_hooks': [self.ytdl_hook], 'noplaylist': True, 'outtmpl': tmp_file + '.%(ext)s', # single_file only needs the extension. 'http_headers': {'User-Agent': settings.get('auth.user_agent')}, 'socket_timeout': 10, 'ffmpeg_location': ffmpeg_download.install_local() } failed = False try: with youtube_dl.YoutubeDL(ydl_opts) as ydl: self.progress.set_status("Looking for video...") ydl.download([url]) except Exception as ex: if 'unsupported url' not in str(ex).lower(): print('YTDL:', ex, '[%s]' % url, file=sys.stderr, flush=True) time.sleep(1) # Give YTDL time to shut down before deleting file parts. failed = True # YTDL can mangle paths, so find the temp file it generated. tmp_file = glob.glob('%s/**/%s.*' % (file.absolute_base(), tmp_hash), recursive=True) if tmp_file: for t in tmp_file: self.files.add(t) tmp_file = tmp_file[0] failed = failed or not tmp_file or any(str(f).endswith('.unknown_video') for f in self.files) if failed: for f in self.files: if os.path.isfile(f): os.remove(f) raise YTDLError("YTDL Download filetype failure.") file.set_ext(str(tmp_file).split(".")[-1]) os.rename(tmp_file, file.absolute()) return file
def print_info(self, prog): if not settings.get('threading.console_clear_screen'): print('\n' * 10) else: try: print(colorama.ansi.clear_screen()) except AttributeError: os.system('cls' if os.name == 'nt' else 'clear') scanning = '+' if prog.loader.get_scanning() else '' print( "Remaining: %s/%s%s" % (prog.loader.get_queue_size(), prog.loader.get_found(), scanning)) rj = 10 for progress in prog.downloaders: print() print('File:', progress.get_file()) print('Handler:'.rjust(rj), progress.get_handler()) print('Status:'.rjust(rj), progress.get_status()) if progress.get_percent(): print('Percent:'.rjust(rj), '%s%%' % progress.get_percent()) else: print()
def run(self, url, file): tmp_file = file.abs_hashed() tmp_hash = os.path.basename(tmp_file) file.mkdirs() ydl_opts = { 'logger': Logger(), 'progress_hooks': [self.ytdl_hook], 'noplaylist': True, 'outtmpl': tmp_file + '.%(ext)s', # single_file only needs the extension. 'http_headers': { 'User-Agent': settings.get('auth.user_agent') }, 'socket_timeout': 10, 'ffmpeg_location': ffmpeg_download.install_local() } with youtube_dl.YoutubeDL(ydl_opts) as ydl: self.progress.set_status("Looking for video...") ydl.download([url]) # YTDL can mangle paths, so find the temp file it generated. tmp_file = glob.glob('%s/**/%s.*' % (file.absolute_base(), tmp_hash), recursive=True) if tmp_file: tmp_file = tmp_file[0] self.files.add(tmp_file) failed = not tmp_file or any( str(f).endswith('.unknown_video') for f in self.files) if failed: for f in self.files: if os.path.isfile(f): os.remove(f) raise Exception("YTDL Download filetype failure.") file.set_ext(str(tmp_file).split(".")[-1]) os.rename(tmp_file, file.absolute()) return file