def _choose_best_file(self, files):
     files = sorted(
         files,
         key=lambda f: SanitizedRelFile(
             base=settings.get("output.base_dir"), file_path=f.path).size(),
         reverse=True)
     return files[0], files[1:]
예제 #2
0
	def test_put(self):
		""" Put should work as expected """
		settings.put('auth.oauth_key', 'value')
		self.assertEqual(settings.get('oauth_key', cat='auth'), 'value', msg='Failed to set correct value!')

		with self.assertRaises(TypeError, msg="Failed to catch invalid value!"):
			settings.put('interface.start_server', 'invalid')
예제 #3
0
    def load(self):
        """ Threaded loading of elements. """
        settings.from_json(self.settings)
        sql.init_from_settings()
        self._session = sql.session()

        self.progress.set_scanning(True)

        retry_failed = settings.get('processing.retry_failed')

        # Query for all unhandled URLs, and submit them before scanning for new Posts.
        unfinished = self._session\
         .query(sql.URL)\
         .filter((sql.URL.processed == False) | (retry_failed and sql.URL.failed == True))\
         .all()
        self._push_url_list(unfinished)

        self._scan_sources()

        self.progress.set_scanning(False)
        # Wait for any remaining ACKS to come in, before closing the writing pipe.
        # ...Until the Downloaders have confirmed completion of everything, more album URLS may come in.
        while len(self._open_ack) > 0 and not self._stop_event.is_set():
            self._handle_acks(timeout=0.5)
        print("Finished loading.")
        sql.close()
예제 #4
0
    def load(self):
        """ Threaded loading of elements. """
        settings.from_json(self.settings)
        sql.init_from_settings()
        self._session = sql.session()
        t_start = datetime.now()  #vy
        print("Started loading.")  #vy
        self.progress.set_scanning(True)

        retry_failed = settings.get('processing.retry_failed')

        # Query for all unhandled URLs, and submit them before scanning for new Posts.
        unfinished = self._session\
         .query(sql.URL)\
         .filter((sql.URL.processed == False) | \
          (retry_failed and sql.URL.failed and \
           sql.not_(sql.URL.failure_reason.contains('404'))))\
         .all()
        print("Loading %s unfinished urls" % len(unfinished))
        self._push_url_list(unfinished)

        self._scan_sources()

        self.progress.set_scanning(False)
        # Wait for any remaining ACKS to come in, before closing the writing pipe.
        # ...Until the Downloaders have confirmed completion of everything, more album URLS may come in.
        while len(self._open_ack) > 0 and not self._stop_event.is_set():
            self._handle_acks(timeout=1.0, clear=True)
        print("Finished loading.")  #vy
        print("Elapsed time: %s" % str(datetime.now() - t_start))  #vy
        sql.close()
예제 #5
0
def api_search_posts(fields, term):
	ret = []

	searcher = sql.PostSearcher(_session)
	for p in searcher.search_fields(fields, term.strip("%")):
		files = []
		for url in p.urls:
			if not url.file:
				print('Post URL Missing a File:', url)
				continue
			file = SanitizedRelFile(base=settings.get("output.base_dir"), file_path=url.file.path)
			if file.is_file():
				files.append({'token': url.file.id, 'path': file.absolute()})
		if len(files):
			ret.append({
				'reddit_id': p.reddit_id,
				'author': p.author,
				'type': p.type,
				'title': p.title,
				'body': p.body,
				'parent_id': p.parent_id,
				'subreddit': p.subreddit,
				'over_18': p.over_18,
				'created_utc': p.created_utc,
				'num_comments': p.num_comments,
				'score': p.score,
				'source_alias': p.source_alias,
				'files': files
			})
	return ret
	def setUp(self):
		global download_ran, session, thread
		if not download_ran:
			download_ran = True
			self.wui = WebUI('test_version')
			self.db_path = join(settings.get('output.base_dir'), 'manifest.sqlite')
			self.url = 'http://%s:%s/index.html#' % (settings.get('interface.host'), settings.get('interface.port'))

			settings.load(self.settings_file)
			settings.put('interface.start_server', True)
			sql.init_from_settings()
			session = sql.session()
			thread = Thread(target=self.wui.display)
			thread.setDaemon(True)
			thread.start()
			self.assertTrue(self.wui.waitFor(10), msg='WebUI Failed to start!')
예제 #7
0
def login():
	global _user, _logged_in
	if not settings.get('auth.refresh_token'):
		raise ConnectionError('Missing the Refresh Token from Reddit! Cannot auth.')
	stringutil.print_color('yellow', "Authenticating via OAuth...")
	_user = _reddit.user.me()
	stringutil.print_color('yellow', "Authenticated as [%s]\n" % _user.name)
	_logged_in = True
예제 #8
0
 def _create_downloaders(self):
     dls = []
     for i in range(settings.get('threading.concurrent_downloads')):
         tp = Downloader(reader=self.loader.get_reader(),
                         ack_queue=self.loader.get_ack_queue(),
                         settings_json=settings.to_json())
         dls.append(tp)
     return dls
예제 #9
0
	def display(self):
		if started:
			return False
		webdir = os.path.join(os.path.dirname(__file__), '../web/')
		filedir = os.path.abspath(settings.get("output.base_dir"))
		start(web_dir=webdir, file_dir=filedir, rmd_version=self.rmd_version)
		while not stopped:
			eel.sleep(1)
예제 #10
0
	def test_conversion(self):
		""" Settings should cast values correctly """
		s = settings.get('interface.port', full_obj=True)
		s.set(10)
		self.assertEqual(s.val(), 10, msg='Setting set() failed!')
		s.set('1337.1')
		self.assertEqual(s.val(), 1337, msg='Setting set() string->int failed!')

		s = settings.get('interface.start_server', full_obj=True)
		s.set('Yes')
		self.assertTrue(s.val(), msg='Failed to cast boolean True from "Yes".')
		s.set('n')
		self.assertFalse(s.val(), msg='Failed to cast boolean True from "n".')
		s.set(1)
		self.assertTrue(s.val(), msg='Failed to cast boolean True from 1.')
		with self.assertRaises(TypeError, msg='Failed to catch Type error!'):
			s.set('fake')
예제 #11
0
def init_from_settings():
    """ Builds the database file using the Settings currently loaded. """
    # db_file = SanitizedRelFile(base=settings.get("output.base_dir"), file_path="manifest.sqlite")  # original
    db_file = SanitizedRelFile(
        base=settings.get("output.manifest_for_sqlite_dir"),
        file_path="manifest.sqlite"
    )  # This is part of the change to save manifest.sqlite to a different directory than the downloads
    db_file.mkdirs()
    init(db_file.absolute())
예제 #12
0
 def _upgrade_file(self, new_file, old_file):
     # print('Upgrading old file:', old_file.id, old_file.path, ' -> ', new_file.id, new_file.path)
     self._session.query(URL). \
      filter(URL.file_id == old_file.id). \
      update({URL.file_id: new_file.id})
     file = SanitizedRelFile(base=settings.get("output.base_dir"),
                             file_path=old_file.path)
     if file.is_file():
         file.delete_file()
예제 #13
0
def api_get_oauth_url():
    port = 7505
    url = False
    message = ''
    if settings.get('interface.port') != port:
        message = 'The UI is not using the default port (%s), and cannot use the premade App to authenticate!' % port
    else:
        url = praw_wrapper.get_reddit_token_url()
    return {'url': url, 'message': message}
def _req_args():
    """ Settings all Requests should use. """
    return {
        'headers': {
            'User-Agent': settings.get('auth.user_agent')
        },
        'timeout': 10,
        'allow_redirects': True
    }
예제 #15
0
	def wait_refresh_rate(self):
		"""
		Waits for the "refresh delay" configured in the settings, or exits early if processing finished before then.
		:return: True if the delay was fully awaited, or False if processing has completed.
		"""
		if not self.loader.get_stop_event().is_set():
			self.loader.get_stop_event().wait(settings.get("threading.display_refresh_rate"))
			return True
		return False
예제 #16
0
def api_get_oauth_url():
    port = 7505
    url = False
    message = ''
    if settings.get('interface.port') != port:
        message = 'The UI is not using the default port (%s), and cannot use the Web App to authenticate! ' \
            'Run RMD with "--authorize" to manually authenticate!' % port
    else:
        url = praw_wrapper.get_reddit_token_url()
    return {'url': url, 'message': message}
예제 #17
0
def _authorize_rmd_token():
	state = eel.btl.request.query.state
	print('New refresh code request: ', state, eel.btl.request.query.code)
	if state.strip() == settings.get('auth.oauth_key').strip():
		code = eel.btl.request.query.code
		print('Saving new reddit code.')
		refresh = praw_wrapper.get_refresh_token(code)
		if refresh:
			settings.put('auth.refresh_token', refresh)
			return 'Saved authorization token! Close this page to continue.'
	return 'Cannot save the new auth key, something went wrong.<br><a href="../index.html">Back</a>'
예제 #18
0
	def setUp(self):
		global download_ran, session
		if not download_ran:
			download_ran = True
			settings.load(self.settings_file)
			tui = TerminalUI()
			tui.display()
			# self.db_path = join(settings.get('output.base_dir'), 'manifest.sqlite')
			self.db_path = join(settings.get('output.manifest_for_sqlite_dir'), 'manifest.sqlite')		# This is part of the change to save manifest.sqlite to a different directory than the downloads
			sql.init_from_settings()
			session = sql.session()
예제 #19
0
 def setUp(self):
     global download_ran, session
     if not download_ran:
         download_ran = True
         settings.load(self.settings_file)
         tui = TerminalUI()
         tui.display()
         self.db_path = join(settings.get('output.base_dir'),
                             'manifest.sqlite')
         sql.init_from_settings()
         session = sql.session()
예제 #20
0
def _downloaded_files():
    """ Allows the UI to request files RMD has scraped.
		In format: "./file?id=file_token"
	"""
    token = eel.btl.request.query.id
    file_obj = _session.query(sql.File).filter(sql.File.id == token).first()
    file_path = file_obj.path
    response = eel.btl.static_file(file_path,
                                   root=os.path.abspath(
                                       settings.get("output.base_dir")))
    response.set_header("Cache-Control", "public, max-age=0")
    return response
예제 #21
0
def _choose_base_name(post):
	"""
	Generate the base file name, missing any extensions. Respects a maximum absolute filepath length.
	:param post: An sql.Post object.
	:return: The RelFile generated, with the path variables inserted and formatted.
	"""
	global _pattern_array
	dct = json.loads(json.dumps(post.__dict__, default=lambda o: None))  # Deep copy, safely removing any handles/refs.
	dct['created_date'] = datetime.fromtimestamp(dct['created_utc']).strftime('%Y-%m-%d')
	dct['created_time'] = datetime.fromtimestamp(dct['created_utc']).strftime('%H.%M.%S')
	if not _pattern_array:
		file_pattern = './%s' % settings.get('output.file_name_pattern').strip('/\\ .')
		_pattern_array = _parse_pattern(file_pattern, dct)
	max_len = 200
	length = min(max_len, max(len(str(dct[k])) for k in dct.keys()))
	while max_len:
		output = SanitizedRelFile(base=settings.get("output.base_dir"), file_path=_build_str(dct, length))
		if len(output.absolute()) <= max_len:
			return output
		length -= 1
	raise Exception("Unable to name file properly! Filename is likely too long!")
def _choose_base_name(post):
    """
	Generate the base file name, missing any extensions. Respects a maximum absolute filepath length.
	:param post: An sql.Post object.
	:return: The RelFile generated, with the path variables inserted and formatted.
	"""
    global _pattern_array
    if not _pattern_array:
        file_pattern = './%s' % settings.get('output.file_name_pattern').strip(
            '/\\ .')
        _pattern_array = _parse_pattern(file_pattern, post.__dict__)
    max_len = 200
    length = min(max_len,
                 max(len(str(post.__dict__[k])) for k in post.__dict__.keys()))
    while max_len:
        output = SanitizedRelFile(base=settings.get("output.base_dir"),
                                  file_path=_build_str(post.__dict__, length))
        if len(output.absolute()) <= max_len:
            return output
        length -= 1
    raise Exception(
        "Unable to name file properly! Filename is likely too long!")
예제 #23
0
 def __init__(self, source_patterns=None):
     super().__init__()
     self.daemon = False
     self.sources = source_patterns
     self.sources = self.load_sources()
     # initialize Loader
     self.loader = RedditLoader(sources=self.sources,
                                settings_json=settings.to_json())
     self.deduplicator = Deduplicator(
         settings_json=settings.to_json(),
         stop_event=self.loader.get_stop_event())
     self._downloaders = self._create_downloaders()
     self._all_processes = [self.loader, *self._downloaders]
     if settings.get('processing.deduplicate_files'):
         self._all_processes.append(self.deduplicator)
예제 #24
0
def _websocket_close(page, old_websockets):
	global stopped
	print('A WebUI just closed. Checking for other connections... (%s)[%s]' % (page, len(old_websockets)))
	for i in range(40):
		eel.sleep(.1)
		# noinspection PyProtectedMember
		if len(eel._websockets) > 0:
			print('Open connections still exist. Not stopping UI server.')
			return
	if not settings.get('interface.keep_open'):
		print('WebUI keep_open is disabled, and all open clients have closed. Exiting.')
		if _controller and _controller.is_alive():
			_controller.stop()
		stopped = True
	else:
		print('Keeping UI server open...')
예제 #25
0
 def print_info(self, prog):
     if settings.get('threading.console_clear_screen'):
         print('\n' * 10, colorama.ansi.clear_screen())
     scanning = '+' if prog.loader.get_scanning() else ''
     print(
         "Remaining: %s/%s%s" %
         (prog.loader.get_queue_size(), prog.loader.get_found(), scanning))
     rj = 10
     for progress in prog.downloaders:
         print()
         print('File:', progress.get_file())
         print('Handler:'.rjust(rj), progress.get_handler())
         print('Status:'.rjust(rj), progress.get_status())
         if progress.get_percent():
             print('Percent:'.rjust(rj), '%s%%' % progress.get_percent())
         else:
             print()
예제 #26
0
	def __init__(self, source_patterns=None):
		super().__init__()
		sql.init_from_settings()  # Make sure the database is built & migrated before starting threads.
		sql.close()
		self.daemon = False
		self.sources = source_patterns
		self.sources = self.load_sources()
		self.db_lock = RLock()
		# initialize Loader
		self.loader = RedditLoader(sources=self.sources, settings_json=settings.to_json(), db_lock=self.db_lock)
		self.deduplicator = Deduplicator(
			settings_json=settings.to_json(),
			stop_event=self.loader.get_stop_event(),
			db_lock=self.db_lock
		)
		self._downloaders = self._create_downloaders()
		self._all_processes = [self.loader, *self._downloaders]
		if settings.get('processing.deduplicate_files'):
			self._all_processes.append(self.deduplicator)
예제 #27
0
def init():
    """
	Sets the credentials to sign in with.
	"""
    global _reddit
    refresh = None
    if settings.get('auth.refresh_token'):
        refresh = settings.get('auth.refresh_token')
    _reddit = praw.Reddit(
        client_id=settings.get('auth.rmd_client_key'),
        client_secret=None,
        redirect_uri='http://%s:%s/authorize' %
        (settings.get('interface.host'), settings.get('interface.port')),
        user_agent=settings.get('auth.user_agent'),
        refresh_token=refresh)
예제 #28
0
	def run(self, url, file):
		tmp_file = file.abs_hashed()
		tmp_hash = os.path.basename(tmp_file)
		file.mkdirs()
		ydl_opts = {
			'logger': Logger(),
			'progress_hooks': [self.ytdl_hook],
			'noplaylist': True,
			'outtmpl': tmp_file + '.%(ext)s',  # single_file only needs the extension.
			'http_headers': {'User-Agent': settings.get('auth.user_agent')},
			'socket_timeout': 10,
			'ffmpeg_location': ffmpeg_download.install_local()
		}
		failed = False
		try:
			with youtube_dl.YoutubeDL(ydl_opts) as ydl:
				self.progress.set_status("Looking for video...")
				ydl.download([url])
		except Exception as ex:
			if 'unsupported url' not in str(ex).lower():
				print('YTDL:', ex, '[%s]' % url, file=sys.stderr, flush=True)
				time.sleep(1)  # Give YTDL time to shut down before deleting file parts.
			failed = True

		# YTDL can mangle paths, so find the temp file it generated.
		tmp_file = glob.glob('%s/**/%s.*' % (file.absolute_base(), tmp_hash), recursive=True)
		if tmp_file:
			for t in tmp_file:
				self.files.add(t)
			tmp_file = tmp_file[0]

		failed = failed or not tmp_file or any(str(f).endswith('.unknown_video') for f in self.files)
		if failed:
			for f in self.files:
				if os.path.isfile(f):
					os.remove(f)
			raise YTDLError("YTDL Download filetype failure.")

		file.set_ext(str(tmp_file).split(".")[-1])
		os.rename(tmp_file, file.absolute())
		return file
예제 #29
0
 def print_info(self, prog):
     if not settings.get('threading.console_clear_screen'):
         print('\n' * 10)
     else:
         try:
             print(colorama.ansi.clear_screen())
         except AttributeError:
             os.system('cls' if os.name == 'nt' else 'clear')
     scanning = '+' if prog.loader.get_scanning() else ''
     print(
         "Remaining: %s/%s%s" %
         (prog.loader.get_queue_size(), prog.loader.get_found(), scanning))
     rj = 10
     for progress in prog.downloaders:
         print()
         print('File:', progress.get_file())
         print('Handler:'.rjust(rj), progress.get_handler())
         print('Status:'.rjust(rj), progress.get_status())
         if progress.get_percent():
             print('Percent:'.rjust(rj), '%s%%' % progress.get_percent())
         else:
             print()
예제 #30
0
    def run(self, url, file):
        tmp_file = file.abs_hashed()
        tmp_hash = os.path.basename(tmp_file)
        file.mkdirs()
        ydl_opts = {
            'logger': Logger(),
            'progress_hooks': [self.ytdl_hook],
            'noplaylist': True,
            'outtmpl':
            tmp_file + '.%(ext)s',  # single_file only needs the extension.
            'http_headers': {
                'User-Agent': settings.get('auth.user_agent')
            },
            'socket_timeout': 10,
            'ffmpeg_location': ffmpeg_download.install_local()
        }
        with youtube_dl.YoutubeDL(ydl_opts) as ydl:
            self.progress.set_status("Looking for video...")
            ydl.download([url])

        # YTDL can mangle paths, so find the temp file it generated.
        tmp_file = glob.glob('%s/**/%s.*' % (file.absolute_base(), tmp_hash),
                             recursive=True)
        if tmp_file:
            tmp_file = tmp_file[0]
            self.files.add(tmp_file)

        failed = not tmp_file or any(
            str(f).endswith('.unknown_video') for f in self.files)
        if failed:
            for f in self.files:
                if os.path.isfile(f):
                    os.remove(f)
            raise Exception("YTDL Download filetype failure.")

        file.set_ext(str(tmp_file).split(".")[-1])
        os.rename(tmp_file, file.absolute())
        return file